diff --git a/.github/workflows/scripts/aot-demo.sh b/.github/workflows/scripts/aot-demo.sh index 7c5071a61e362..6b20e58cff1ba 100755 --- a/.github/workflows/scripts/aot-demo.sh +++ b/.github/workflows/scripts/aot-demo.sh @@ -4,7 +4,7 @@ set -ex export TI_SKIP_VERSION_CHECK=ON export TI_CI=1 -export TAICHI_AOT_DEMO_URL=https://github.com/taichi-dev/taichi-aot-demo +export TAICHI_AOT_DEMO_URL=https://github.com/bobcao3/taichi-aot-demo export TAICHI_AOT_DEMO_BRANCH=master export TAICHI_UNITY2_URL=https://github.com/taichi-dev/taichi-unity2 diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 9500bf2544bbf..90a482932247c 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -291,7 +291,7 @@ jobs: . .github/workflows/scripts/common-utils.sh ci-docker-run-amdgpu --name taichi-build \ - registry.taichigraphics.com/taichidev-ubuntu18.04.amdgpu:v0.0.3 \ + registry.taichigraphics.com/taichidev-ubuntu18.04.amdgpu:v0.0.5 \ /home/dev/taichi/.github/workflows/scripts/build.py env: @@ -302,6 +302,7 @@ jobs: -DTI_WITH_VULKAN:BOOL=OFF -DTI_WITH_OPENGL:BOOL=OFF -DTI_BUILD_TESTS:BOOL=ON + -DTI_WITH_AMDGPU:BOOL=ON - name: Test id: test @@ -310,7 +311,7 @@ jobs: . .github/workflows/scripts/common-utils.sh ci-docker-run-amdgpu --name taichi-test \ - registry.taichigraphics.com/taichidev-ubuntu18.04.amdgpu:v0.0.3 \ + registry.taichigraphics.com/taichidev-ubuntu18.04.amdgpu:v0.0.5 \ /home/dev/taichi/.github/workflows/scripts/unix_test.sh env: PY: '3.8' diff --git a/CMakeLists.txt b/CMakeLists.txt index 1a220b8570364..822c604f4baa3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -181,6 +181,10 @@ if (TI_WITH_CUDA) set(CUDA_ARCH "cuda") endif() +if (TI_WITH_AMDGPU) + set(AMDGPU_ARCH "amdgpu") +endif() + if (TI_WITH_DX12) set(DX12_ARCH "dx12") endif() diff --git a/cmake/TaichiCore.cmake b/cmake/TaichiCore.cmake index 241d8df91922a..5b715d4bd3def 100644 --- a/cmake/TaichiCore.cmake +++ b/cmake/TaichiCore.cmake @@ -3,6 +3,7 @@ option(TI_WITH_LLVM "Build with LLVM backends" ON) option(TI_WITH_METAL "Build with the Metal backend" ON) option(TI_WITH_CUDA "Build with the CUDA backend" ON) option(TI_WITH_CUDA_TOOLKIT "Build with the CUDA toolkit" OFF) +option(TI_WITH_AMDGPU "Build with the AMDGPU backend" OFF) option(TI_WITH_OPENGL "Build with the OpenGL backend" ON) option(TI_WITH_CC "Build with the C backend" ON) option(TI_WITH_VULKAN "Build with the Vulkan backend" OFF) @@ -34,6 +35,10 @@ if(ANDROID) set(TI_WITH_DX12 OFF) endif() +if (TI_WITH_AMDGPU AND TI_WITH_CUDA) + message(WARNING "Compiling CUDA and AMDGPU backends simultaneously") +endif() + if(UNIX AND NOT APPLE) # Handy helper for Linux # https://stackoverflow.com/a/32259072/12003165 @@ -53,6 +58,10 @@ if (APPLE) set(TI_WITH_CC OFF) message(WARNING "C backend not supported on OS X. Setting TI_WITH_CC to OFF.") endif() + if (TI_WITH_AMDGPU) + set(TI_WITH_AMDGPU OFF) + message(WARNING "AMDGPU backend not supported on OS X. Setting TI_WITH_AMDGPU to OFF.") + endif() endif() if (WIN32) @@ -60,6 +69,10 @@ if (WIN32) set(TI_WITH_CC OFF) message(WARNING "C backend not supported on Windows. Setting TI_WITH_CC to OFF.") endif() + if (TI_WITH_AMDGPU) + set(TI_WITH_AMDGPU OFF) + message(WARNING "AMDGPU backend not supported on Windows. Setting TI_WITH_AMDGPU to OFF.") + endif() endif() if(TI_WITH_VULKAN) @@ -108,6 +121,12 @@ if (TI_WITH_CUDA) list(APPEND TAICHI_CORE_SOURCE ${TAICHI_CUDA_RUNTIME_SOURCE}) endif() +if (TI_WITH_AMDGPU) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTI_WITH_AMDGPU") +# file(GLOB TAICHI_AMDGPU_RUNTIME_SOURCE "taichi/runtime/amdgpu/runtime.cpp") + list(APPEND TAIHI_CORE_SOURCE ${TAICHI_AMDGPU_RUNTIME_SOURCE}) +endif() + if (TI_WITH_DX12) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTI_WITH_DX12") endif() @@ -215,6 +234,12 @@ if(TI_WITH_LLVM) target_link_libraries(${CORE_LIBRARY_NAME} PRIVATE cuda_rhi) endif() + if (TI_WITH_AMDGPU) + llvm_map_components_to_libnames(llvm_amdgpu_libs AMDGPU) + add_subdirectory(taichi/rhi/amdgpu) + target_link_libraries(${CORE_LIBRARY_NAME} PRIVATE amdgpu_rhi) + endif() + if (TI_WITH_DX12) llvm_map_components_to_libnames(llvm_directx_libs DirectX) diff --git a/cpp_examples/rhi_examples/sample_2_triangle.cpp b/cpp_examples/rhi_examples/sample_2_triangle.cpp index 7382abeab20aa..0769b94b3a20d 100644 --- a/cpp_examples/rhi_examples/sample_2_triangle.cpp +++ b/cpp_examples/rhi_examples/sample_2_triangle.cpp @@ -73,6 +73,12 @@ class SampleApp : public App { device->unmap(*vertex_buffer); } + // Define the raster state + { + raster_resources = device->create_raster_resources_unique(); + raster_resources->vertex_buffer(vertex_buffer->get_ptr(0), 0); + } + TI_INFO("App Init Done"); } @@ -94,10 +100,7 @@ class SampleApp : public App { // Bind our triangle pipeline cmdlist->bind_pipeline(pipeline.get()); - // Get the binder and bind our vertex buffer - auto resource_binder = pipeline->resource_binder(); - resource_binder->vertex_buffer(vertex_buffer->get_ptr(0), 0); - cmdlist->bind_resources(resource_binder); + cmdlist->bind_raster_resources(raster_resources.get()); // Render the triangle cmdlist->draw(3, 0); // End rendering @@ -110,9 +113,10 @@ class SampleApp : public App { } public: - std::unique_ptr pipeline; + std::unique_ptr pipeline{nullptr}; + std::unique_ptr raster_resources{nullptr}; - std::unique_ptr vertex_buffer; + std::unique_ptr vertex_buffer{nullptr}; }; int main() { diff --git a/taichi/analysis/gather_statement_usages.cpp b/taichi/analysis/gather_statement_usages.cpp new file mode 100644 index 0000000000000..ba12d58e4a313 --- /dev/null +++ b/taichi/analysis/gather_statement_usages.cpp @@ -0,0 +1,55 @@ +#include "taichi/ir/ir.h" +#include "taichi/ir/statements.h" +#include "taichi/ir/transforms.h" +#include "taichi/ir/visitors.h" + +namespace taichi::lang { + +class GatherStatementUsages : public BasicStmtVisitor { + private: + using BasicStmtVisitor::visit; + + // maps a stmt to all its usages + std::unordered_map>> stmt_usages_; + + public: + explicit GatherStatementUsages() { + invoke_default_visitor = true; + } + + void default_visit(Stmt *stmt) { + auto ops = stmt->get_operands(); + for (int i = 0; i < ops.size(); i++) { + auto &op = ops[i]; + if (op != nullptr) { + stmt_usages_[op].push_back({stmt, i}); + } + } + } + + void visit(Stmt *stmt) override { + default_visit(stmt); + } + + void preprocess_container_stmt(Stmt *stmt) override { + default_visit(stmt); + } + + static std::unordered_map>> run( + IRNode *node) { + GatherStatementUsages pass; + node->accept(&pass); + return pass.stmt_usages_; + } +}; + +namespace irpass::analysis { + +std::unordered_map>> +gather_statement_usages(IRNode *root) { + return GatherStatementUsages::run(root); +} + +} // namespace irpass::analysis + +} // namespace taichi::lang diff --git a/taichi/cache/metal/cache_manager.cpp b/taichi/cache/metal/cache_manager.cpp index 040a769a046da..b19238d31883f 100644 --- a/taichi/cache/metal/cache_manager.cpp +++ b/taichi/cache/metal/cache_manager.cpp @@ -59,13 +59,15 @@ CacheManager::CacheManager(Params &&init_params) if (config_.mode == MemAndDiskCache) { const auto filepath = join_path(config_.cache_path, kMetadataFilename); const auto lock_path = join_path(config_.cache_path, kMetadataLockName); - if (lock_with_file(lock_path)) { - auto _ = make_unlocker(lock_path); - offline_cache::load_metadata_with_checking(cached_data_, filepath); - } else { - TI_WARN( - "Lock {} failed. You can run 'ti cache clean -p {}' and try again.", - lock_path, config_.cache_path); + if (path_exists(filepath)) { + if (lock_with_file(lock_path)) { + auto _ = make_unlocker(lock_path); + offline_cache::load_metadata_with_checking(cached_data_, filepath); + } else { + TI_WARN( + "Lock {} failed. You can run 'ti cache clean -p {}' and try again.", + lock_path, config_.cache_path); + } } } } diff --git a/taichi/codegen/spirv/spirv_codegen.cpp b/taichi/codegen/spirv/spirv_codegen.cpp index ea40d7e82dcdc..92077abdc7f87 100644 --- a/taichi/codegen/spirv/spirv_codegen.cpp +++ b/taichi/codegen/spirv/spirv_codegen.cpp @@ -91,7 +91,7 @@ class TaskCodegen : public IRVisitor { void fill_snode_to_root() { for (int root = 0; root < compiled_structs_.size(); ++root) { - for (auto [node_id, node] : compiled_structs_[root].snode_descriptors) { + for (auto &[node_id, node] : compiled_structs_[root].snode_descriptors) { snode_to_root_[node_id] = root; } } @@ -108,9 +108,6 @@ class TaskCodegen : public IRVisitor { kernel_function_ = ir_->new_function(); // void main(); ir_->debug_name(spv::OpName, kernel_function_, "main"); - compile_args_struct(); - compile_ret_struct(); - if (task_ir_->task_type == OffloadedTaskType::serial) { generate_serial_kernel(task_ir_); } else if (task_ir_->task_type == OffloadedTaskType::range_for) { @@ -1749,22 +1746,21 @@ class TaskCodegen : public IRVisitor { std::vector buffers; if (caps_->get(DeviceCapability::spirv_version) > 0x10300) { buffers = shared_array_binds_; - std::unordered_set unique_bufs; // One buffer can be bound to different bind points but has to be unique // in OpEntryPoint interface declarations. // From Spec: before SPIR-V version 1.4, duplication of these interface id // is tolerated. Starting with version 1.4, an interface id must not // appear more than once. + std::unordered_set entry_point_values; for (const auto &bb : task_attribs_.buffer_binds) { - if (unique_bufs.count(bb.buffer) == 0) { - for (auto &it : buffer_value_map_) { - if (it.first.first == bb.buffer) { - buffers.push_back(it.second); - } + for (auto &it : buffer_value_map_) { + if (it.first.first == bb.buffer) { + entry_point_values.insert(it.second); } - unique_bufs.insert(bb.buffer); } } + buffers.insert(buffers.end(), entry_point_values.begin(), + entry_point_values.end()); } ir_->commit_kernel_function(kernel_function_, "main", buffers, group_size); // kernel entry @@ -2248,12 +2244,16 @@ class TaskCodegen : public IRVisitor { } if (buffer.type == BufferType::Args) { + compile_args_struct(); + buffer_binding_map_[key] = 0; buffer_value_map_[key] = args_buffer_value_; return args_buffer_value_; } if (buffer.type == BufferType::Rets) { + compile_ret_struct(); + buffer_binding_map_[key] = 1; buffer_value_map_[key] = ret_buffer_value_; return ret_buffer_value_; @@ -2537,7 +2537,7 @@ void KernelCodegen::run(TaichiKernelAttributes &kernel_attribs, size_t last_size; bool success = true; - do { + { last_size = optimized_spv.size(); bool result = false; TI_ERROR_IF( @@ -2546,9 +2546,8 @@ void KernelCodegen::run(TaichiKernelAttributes &kernel_attribs, "SPIRV optimization failed"); if (result) { success = false; - break; } - } while (last_size != optimized_spv.size()); + } TI_TRACE("SPIRV-Tools-opt: binary size, before={}, after={}", task_res.spirv_code.size(), optimized_spv.size()); diff --git a/taichi/codegen/spirv/spirv_ir_builder.cpp b/taichi/codegen/spirv/spirv_ir_builder.cpp index ae14731455e6b..7d12da0581c69 100644 --- a/taichi/codegen/spirv/spirv_ir_builder.cpp +++ b/taichi/codegen/spirv/spirv_ir_builder.cpp @@ -835,11 +835,7 @@ Value IRBuilder::fetch_texel(Value texture_var, // OpImageFetch requires operand with OpImageType // We have to extract the underlying OpImage from OpSampledImage here SType image_type = get_underlying_image_type(f32_type(), args.size()); - Value image_val = new_value(image_type, ValueKind::kNormal); - - ib_.begin(spv::OpImage) - .add_seq(image_type, image_val, sampled_image) - .commit(&function_); + Value image_val = make_value(spv::OpImage, image_type, sampled_image); Value uv; if (args.size() == 1) { diff --git a/taichi/codegen/spirv/spirv_ir_builder.h b/taichi/codegen/spirv/spirv_ir_builder.h index a7211b059a534..a981f819953bf 100644 --- a/taichi/codegen/spirv/spirv_ir_builder.h +++ b/taichi/codegen/spirv/spirv_ir_builder.h @@ -86,6 +86,16 @@ struct Value { SType stype; // Additional flags about the value ValueKind flag{ValueKind::kNormal}; + + bool operator==(const Value &rhs) const { + return id == rhs.id; + } +}; + +struct ValueHasher { + size_t operator()(const spirv::Value &v) const { + return std::hash()(v.id); + } }; // Represent the SPIRV Label diff --git a/taichi/ir/analysis.h b/taichi/ir/analysis.h index 69d68ee4a32d1..b477a9649cbd1 100644 --- a/taichi/ir/analysis.h +++ b/taichi/ir/analysis.h @@ -95,6 +95,8 @@ bool definitely_same_address(Stmt *var1, Stmt *var2); std::unordered_set detect_fors_with_break(IRNode *root); std::unordered_set detect_loops_with_continue(IRNode *root); +std::unordered_map>> +gather_statement_usages(IRNode *root); std::unordered_set gather_immutable_local_vars(IRNode *root); std::unordered_set gather_deactivations(IRNode *root); std::pair, std::unordered_set> diff --git a/taichi/ir/ir.cpp b/taichi/ir/ir.cpp index 6e2f9d3695794..cffdf09443c77 100644 --- a/taichi/ir/ir.cpp +++ b/taichi/ir/ir.cpp @@ -4,7 +4,7 @@ #include #include -// #include "taichi/ir/analysis.h" +#include "taichi/ir/analysis.h" #include "taichi/ir/statements.h" #include "taichi/ir/transforms.h" @@ -496,4 +496,16 @@ void DelayedIRModifier::mark_as_modified() { modified_ = true; } +ImmediateIRModifier::ImmediateIRModifier(IRNode *root) { + stmt_usages_ = irpass::analysis::gather_statement_usages(root); +} + +void ImmediateIRModifier::replace_usages_with(Stmt *old_stmt, Stmt *new_stmt) { + if (stmt_usages_.find(old_stmt) == stmt_usages_.end()) + return; + for (auto &[usage, i] : stmt_usages_.at(old_stmt)) { + usage->set_operand(i, new_stmt); + } +} + } // namespace taichi::lang diff --git a/taichi/ir/ir.h b/taichi/ir/ir.h index 929dceccc9d02..12f22794e832e 100644 --- a/taichi/ir/ir.h +++ b/taichi/ir/ir.h @@ -609,6 +609,19 @@ class DelayedIRModifier { void mark_as_modified(); }; +// ImmediateIRModifier aims at replacing Stmt::replace_usages_with, which visits +// the whole tree for a single replacement. ImmediateIRModifier is currently +// associated with a pass, visits the whole tree once at the beginning of that +// pass, and performs a single replacement with amortized constant time. +class ImmediateIRModifier { + private: + std::unordered_map>> stmt_usages_; + + public: + explicit ImmediateIRModifier(IRNode *root); + void replace_usages_with(Stmt *old_stmt, Stmt *new_stmt); +}; + template inline void StmtFieldManager::operator()(const char *key, T &&value) { using decay_T = typename std::decay::type; diff --git a/taichi/program/program.cpp b/taichi/program/program.cpp index 0243dbd93ef68..482dce4a5bf6c 100644 --- a/taichi/program/program.cpp +++ b/taichi/program/program.cpp @@ -484,12 +484,13 @@ intptr_t Program::get_ndarray_data_ptr_as_int(const Ndarray *ndarray) { return reinterpret_cast(data_ptr); } -void Program::fill_ndarray_fast(Ndarray *ndarray, uint32_t val) { +void Program::fill_ndarray_fast_u32(Ndarray *ndarray, uint32_t val) { // This is a temporary solution to bypass device api. // Should be moved to CommandList once available in CUDA. program_impl_->fill_ndarray( ndarray->ndarray_alloc_, - ndarray->get_nelement() * ndarray->get_element_size(), val); + ndarray->get_nelement() * ndarray->get_element_size() / sizeof(uint32_t), + val); } Program::~Program() { diff --git a/taichi/program/program.h b/taichi/program/program.h index 48b4014edb736..941007fde7583 100644 --- a/taichi/program/program.h +++ b/taichi/program/program.h @@ -339,7 +339,7 @@ class TI_DLL_EXPORT Program { intptr_t get_ndarray_data_ptr_as_int(const Ndarray *ndarray); - void fill_ndarray_fast(Ndarray *ndarray, uint32_t val); + void fill_ndarray_fast_u32(Ndarray *ndarray, uint32_t val); ASTBuilder *current_ast_builder() { return current_callable ? ¤t_callable->context->builder() : nullptr; diff --git a/taichi/python/export_lang.cpp b/taichi/python/export_lang.cpp index fd24a8be163a6..19320c89b9569 100644 --- a/taichi/python/export_lang.cpp +++ b/taichi/python/export_lang.cpp @@ -476,16 +476,16 @@ void export_lang(py::module &m) { }) .def("fill_float", [](Program *program, Ndarray *ndarray, float val) { - program->fill_ndarray_fast(ndarray, - reinterpret_cast(val)); + program->fill_ndarray_fast_u32(ndarray, + reinterpret_cast(val)); }) .def("fill_int", [](Program *program, Ndarray *ndarray, int32_t val) { - program->fill_ndarray_fast(ndarray, - reinterpret_cast(val)); + program->fill_ndarray_fast_u32(ndarray, + reinterpret_cast(val)); }) .def("fill_uint", [](Program *program, Ndarray *ndarray, uint32_t val) { - program->fill_ndarray_fast(ndarray, val); + program->fill_ndarray_fast_u32(ndarray, val); }); py::class_(m, "AotModuleBuilder") diff --git a/taichi/rhi/amdgpu/CMakeLists.txt b/taichi/rhi/amdgpu/CMakeLists.txt new file mode 100644 index 0000000000000..8c6e42417bb13 --- /dev/null +++ b/taichi/rhi/amdgpu/CMakeLists.txt @@ -0,0 +1,21 @@ +# ./taichi/rhi/amdgpu/CMakeLists.txt + +set(AMDGPU_RHI amdgpu_rhi) +add_library(${AMDGPU_RHI}) +target_sources(${AMDGPU_RHI} + PRIVATE + amdgpu_device.cpp + amdgpu_caching_allocator.cpp + amdgpu_context.cpp + amdgpu_driver.cpp + ) + +target_include_directories(${AMDGPU_RHI} + PRIVATE + ${PROJECT_SOURCE_DIR} + ${PROJECT_SOURCE_DIR}/external/eigen + ${PROJECT_SOURCE_DIR}/external/spdlog/include + ${LLVM_INCLUDE_DIRS} + ) + +target_link_libraries(${AMDGPU_RHI} PRIVATE interop_rhi) diff --git a/taichi/rhi/amdgpu/amdgpu_caching_allocator.cpp b/taichi/rhi/amdgpu/amdgpu_caching_allocator.cpp new file mode 100644 index 0000000000000..4e6418e96518e --- /dev/null +++ b/taichi/rhi/amdgpu/amdgpu_caching_allocator.cpp @@ -0,0 +1,40 @@ +#include "taichi/rhi/amdgpu/amdgpu_caching_allocator.h" + +namespace taichi { +namespace lang { +namespace amdgpu { + +AmdgpuCachingAllocator::AmdgpuCachingAllocator(LlvmDevice *device) + : device_(device) { +} + +uint64_t *AmdgpuCachingAllocator::allocate( + const LlvmDevice::LlvmRuntimeAllocParams ¶ms) { + uint64_t *ret{nullptr}; + auto size_aligned = taichi::iroundup(params.size, taichi_page_size); + auto it_blk = mem_blocks_.lower_bound(size_aligned); + + if (it_blk != mem_blocks_.end()) { + size_t remaining_sz = it_blk->first - size_aligned; + if (remaining_sz > 0) { + TI_ASSERT(remaining_sz % taichi_page_size == 0); + auto remaining_head = + reinterpret_cast(it_blk->second) + size_aligned; + mem_blocks_.insert( + {remaining_sz, reinterpret_cast(remaining_head)}); + } + ret = it_blk->second; + mem_blocks_.erase(it_blk); + } else { + ret = device_->allocate_llvm_runtime_memory_jit(params); + } + return ret; +} + +void AmdgpuCachingAllocator::release(size_t sz, uint64_t *ptr) { + mem_blocks_.insert({sz, ptr}); +} + +} // namespace amdgpu +} // namespace lang +} // namespace taichi diff --git a/taichi/rhi/amdgpu/amdgpu_caching_allocator.h b/taichi/rhi/amdgpu/amdgpu_caching_allocator.h new file mode 100644 index 0000000000000..bebcefdf16324 --- /dev/null +++ b/taichi/rhi/amdgpu/amdgpu_caching_allocator.h @@ -0,0 +1,28 @@ +#pragma once + +#include "taichi/common/core.h" +#include "taichi/math/arithmetic.h" +#include "taichi/rhi/llvm/llvm_device.h" +#include "taichi/inc/constants.h" +#include +#include + +namespace taichi { +namespace lang { +namespace amdgpu { + +class AmdgpuCachingAllocator { + public: + AmdgpuCachingAllocator(LlvmDevice *device); + + uint64_t *allocate(const LlvmDevice::LlvmRuntimeAllocParams ¶ms); + void release(size_t sz, uint64_t *ptr); + + private: + std::multimap mem_blocks_; + LlvmDevice *device_{nullptr}; +}; + +} // namespace amdgpu +} // namespace lang +} // namespace taichi diff --git a/taichi/rhi/amdgpu/amdgpu_context.cpp b/taichi/rhi/amdgpu/amdgpu_context.cpp new file mode 100644 index 0000000000000..04fb173a0e73b --- /dev/null +++ b/taichi/rhi/amdgpu/amdgpu_context.cpp @@ -0,0 +1,93 @@ +#define TI_RUNTIME_HOST +#include "amdgpu_context.h" + +#include +#include + +#include "taichi/util/lang_util.h" +#include "taichi/program/program.h" +#include "taichi/system/threading.h" +#include "taichi/rhi/amdgpu/amdgpu_driver.h" +#include "taichi/analysis/offline_cache_util.h" + +namespace taichi { +namespace lang { + +AMDGPUContext::AMDGPUContext() + : driver_(AMDGPUDriver::get_instance_without_context()) { + dev_count_ = 0; + driver_.init(0); + driver_.device_get_count(&dev_count_); + driver_.device_get(&device_, 0); + + char name[128]; + driver_.device_get_name(name, 128, device_); + + TI_TRACE("Using AMDGPU device [id=0]: {}", name); + + driver_.context_create(&context_, 0, device_); + + const auto GB = std::pow(1024.0, 3.0); + TI_TRACE("Total memory {:.2f} GB; free memory {:.2f} GB", + get_total_memory() / GB, get_free_memory() / GB); + + void *hip_device_prop = std::malloc(HIP_DEVICE_PROPERTIES_STRUCT_SIZE); + driver_.device_get_prop(hip_device_prop, device_); + compute_capability_ = *((int *)hip_device_prop + HIP_DEVICE_GCN_ARCH); + std::free(hip_device_prop); + + mcpu_ = fmt::format("gfx{}", compute_capability_); + + TI_TRACE("Emitting AMDGPU code for {}", mcpu_); +} + +std::size_t AMDGPUContext::get_total_memory() { + std::size_t ret, _; + driver_.mem_get_info(&_, &ret); + return ret; +} + +std::size_t AMDGPUContext::get_free_memory() { + std::size_t ret, _; + driver_.mem_get_info(&ret, &_); + return ret; +} + +std::string AMDGPUContext::get_device_name() { + constexpr uint32_t kMaxNameStringLength = 128; + char name[kMaxNameStringLength]; + driver_.device_get_name(name, kMaxNameStringLength /*=128*/, device_); + std::string str(name); + return str; +} + +void AMDGPUContext::launch(void *func, + const std::string &task_name, + void *arg_pointers, + unsigned grid_dim, + unsigned block_dim, + std::size_t dynamic_shared_mem_bytes, + int arg_bytes) { + if (grid_dim > 0) { + std::lock_guard _(lock_); + void *config[] = {(void *)0x01, const_cast(arg_pointers), + (void *)0x02, &arg_bytes, (void *)0x03}; + driver_.launch_kernel(func, grid_dim, 1, 1, block_dim, 1, 1, + dynamic_shared_mem_bytes, nullptr, nullptr, + reinterpret_cast(&config)); + } + if (debug_) { + driver_.stream_synchronize(nullptr); + } +} + +AMDGPUContext::~AMDGPUContext() { +} + +AMDGPUContext &AMDGPUContext::get_instance() { + static auto context = new AMDGPUContext(); + return *context; +} + +} // namespace lang +} // namespace taichi diff --git a/taichi/rhi/amdgpu/amdgpu_context.h b/taichi/rhi/amdgpu/amdgpu_context.h new file mode 100644 index 0000000000000..7e182e07ea3d7 --- /dev/null +++ b/taichi/rhi/amdgpu/amdgpu_context.h @@ -0,0 +1,99 @@ +#pragma once + +#include +#include +#include + +#include "taichi/program/kernel_profiler.h" +#include "taichi/rhi/amdgpu/amdgpu_driver.h" + +namespace taichi { +namespace lang { + +class AMDGPUDriver; + +class AMDGPUContext { + private: + void *device_; + void *context_; + int dev_count_; + int compute_capability_; + std::string mcpu_; + std::mutex lock_; + AMDGPUDriver &driver_; + bool debug_; + + public: + AMDGPUContext(); + + std::size_t get_total_memory(); + std::size_t get_free_memory(); + std::string get_device_name(); + + bool detected() const { + return dev_count_ != 0; + } + + void launch(void *func, + const std::string &task_name, + void *arg_pointers, + unsigned grid_dim, + unsigned block_dim, + std::size_t dynamic_shared_mem_bytes, + int arg_bytes); + + void set_debug(bool debug) { + debug_ = debug; + } + + std::string get_mcpu() const { + return mcpu_; + } + + void *get_context() { + return context_; + } + + void make_current() { + driver_.context_set_current(context_); + } + + int get_compute_capability() const { + return compute_capability_; + } + + ~AMDGPUContext(); + + class ContextGuard { + private: + void *old_ctx_; + void *new_ctx_; + + public: + ContextGuard(AMDGPUContext *new_ctx) + : old_ctx_(nullptr), new_ctx_(new_ctx) { + AMDGPUDriver::get_instance().context_get_current(&old_ctx_); + if (old_ctx_ != new_ctx) + new_ctx->make_current(); + } + + ~ContextGuard() { + if (old_ctx_ != new_ctx_) { + AMDGPUDriver::get_instance().context_set_current(old_ctx_); + } + } + }; + + ContextGuard get_guard() { + return ContextGuard(this); + } + + std::unique_lock get_lock_guard() { + return std::unique_lock(lock_); + } + + static AMDGPUContext &get_instance(); +}; + +} // namespace lang +} // namespace taichi diff --git a/taichi/rhi/amdgpu/amdgpu_device.cpp b/taichi/rhi/amdgpu/amdgpu_device.cpp new file mode 100644 index 0000000000000..75df5cdd5a598 --- /dev/null +++ b/taichi/rhi/amdgpu/amdgpu_device.cpp @@ -0,0 +1,134 @@ +#include "taichi/rhi/amdgpu/amdgpu_device.h" + +namespace taichi { +namespace lang { + +namespace amdgpu { + +AmdgpuDevice::AllocInfo AmdgpuDevice::get_alloc_info( + const DeviceAllocation handle) { + validate_device_alloc(handle); + return allocations_[handle.alloc_id]; +} + +DeviceAllocation AmdgpuDevice::allocate_memory(const AllocParams ¶ms) { + AllocInfo info; + + if (params.host_read || params.host_write) { + AMDGPUDriver::get_instance().malloc_managed(&info.ptr, params.size, + HIP_MEM_ATTACH_GLOBAL); + } else { + AMDGPUDriver::get_instance().malloc(&info.ptr, params.size); + } + + info.size = params.size; + info.is_imported = false; + info.use_cached = false; + info.use_preallocated = false; + + DeviceAllocation alloc; + alloc.alloc_id = allocations_.size(); + alloc.device = this; + + allocations_.push_back(info); + return alloc; +} + +DeviceAllocation AmdgpuDevice::allocate_memory_runtime( + const LlvmRuntimeAllocParams ¶ms) { + AllocInfo info; + info.size = taichi::iroundup(params.size, taichi_page_size); + if (params.host_read || params.host_write) { + TI_NOT_IMPLEMENTED + } else if (params.use_cached) { + if (caching_allocator_ == nullptr) { + caching_allocator_ = std::make_unique(this); + } + info.ptr = caching_allocator_->allocate(params); + AMDGPUDriver::get_instance().memset((void *)info.ptr, 0, info.size); + } else { + info.ptr = allocate_llvm_runtime_memory_jit(params); + } + info.is_imported = false; + info.use_cached = params.use_cached; + info.use_preallocated = true; + + DeviceAllocation alloc; + alloc.alloc_id = allocations_.size(); + alloc.device = this; + + allocations_.push_back(info); + return alloc; +} + +void AmdgpuDevice::dealloc_memory(DeviceAllocation handle) { + validate_device_alloc(handle); + AllocInfo &info = allocations_[handle.alloc_id]; + if (info.ptr == nullptr) { + TI_ERROR("the DeviceAllocation is already deallocated"); + } + TI_ASSERT(!info.is_imported); + if (info.use_cached) { + if (caching_allocator_ == nullptr) { + TI_ERROR("the AmdgpuCachingAllocator is not initialized"); + } + caching_allocator_->release(info.size, (uint64_t *)info.ptr); + } else if (!info.use_preallocated) { + AMDGPUDriver::get_instance().mem_free(info.ptr); + info.ptr = nullptr; + } +} + +RhiResult AmdgpuDevice::map(DeviceAllocation alloc, void **mapped_ptr) { + AllocInfo &info = allocations_[alloc.alloc_id]; + size_t size = info.size; + info.mapped = new char[size]; + // FIXME: there should be a better way to do this... + AMDGPUDriver::get_instance().memcpy_device_to_host(info.mapped, info.ptr, + size); + *mapped_ptr = info.mapped; + return RhiResult::success; +} + +void AmdgpuDevice::unmap(DeviceAllocation alloc) { + AllocInfo &info = allocations_[alloc.alloc_id]; + AMDGPUDriver::get_instance().memcpy_host_to_device(info.ptr, info.mapped, + info.size); + delete[] static_cast(info.mapped); + return; +} + +void AmdgpuDevice::memcpy_internal(DevicePtr dst, + DevicePtr src, + uint64_t size) { + void *dst_ptr = + static_cast(allocations_[dst.alloc_id].ptr) + dst.offset; + void *src_ptr = + static_cast(allocations_[src.alloc_id].ptr) + src.offset; + AMDGPUDriver::get_instance().memcpy_device_to_device(dst_ptr, src_ptr, size); +} + +DeviceAllocation AmdgpuDevice::import_memory(void *ptr, size_t size) { + AllocInfo info; + info.ptr = ptr; + info.size = size; + info.is_imported = true; + + DeviceAllocation alloc; + alloc.alloc_id = allocations_.size(); + alloc.device = this; + + allocations_.push_back(info); + return alloc; +} + +uint64 AmdgpuDevice::fetch_result_uint64(int i, uint64 *result_buffer) { + AMDGPUDriver::get_instance().stream_synchronize(nullptr); + uint64 ret; + AMDGPUDriver::get_instance().memcpy_device_to_host(&ret, result_buffer + i, + sizeof(uint64)); + return ret; +} +} // namespace amdgpu +} // namespace lang +} // namespace taichi diff --git a/taichi/rhi/amdgpu/amdgpu_device.h b/taichi/rhi/amdgpu/amdgpu_device.h new file mode 100644 index 0000000000000..ab567b746d7ed --- /dev/null +++ b/taichi/rhi/amdgpu/amdgpu_device.h @@ -0,0 +1,110 @@ +#pragma once +#include +#include + +#include "taichi/common/core.h" +#include "taichi/rhi/amdgpu/amdgpu_driver.h" +#include "taichi/rhi/amdgpu/amdgpu_caching_allocator.h" +#include "taichi/rhi/amdgpu/amdgpu_context.h" +#include "taichi/rhi/llvm/llvm_device.h" + +namespace taichi { +namespace lang { +namespace amdgpu { + +class AmdgpuCommandList : public CommandList { + public: + ~AmdgpuCommandList() override { + } + + void bind_pipeline(Pipeline *p) override{TI_NOT_IMPLEMENTED}; + RhiResult bind_shader_resources(ShaderResourceSet *res, + int set_index = 0) final{TI_NOT_IMPLEMENTED}; + RhiResult bind_raster_resources(RasterResources *res) final{ + TI_NOT_IMPLEMENTED}; + void buffer_barrier(DevicePtr ptr, size_t size) override{TI_NOT_IMPLEMENTED}; + void buffer_barrier(DeviceAllocation alloc) override{TI_NOT_IMPLEMENTED}; + void memory_barrier() override{TI_NOT_IMPLEMENTED}; + void buffer_copy(DevicePtr dst, DevicePtr src, size_t size) override{ + TI_NOT_IMPLEMENTED}; + void buffer_fill(DevicePtr ptr, size_t size, uint32_t data) override{ + TI_NOT_IMPLEMENTED}; + void dispatch(uint32_t x, uint32_t y = 1, uint32_t z = 1) override{ + TI_NOT_IMPLEMENTED}; +}; + +class AmdgpuStream : public Stream { + public: + ~AmdgpuStream() override{}; + + std::unique_ptr new_command_list() override{TI_NOT_IMPLEMENTED}; + StreamSemaphore submit(CommandList *cmdlist, + const std::vector &wait_semaphores = + {}) override{TI_NOT_IMPLEMENTED}; + StreamSemaphore submit_synced( + CommandList *cmdlist, + const std::vector &wait_semaphores = {}) override{ + TI_NOT_IMPLEMENTED}; + + void command_sync() override{TI_NOT_IMPLEMENTED}; +}; + +class AmdgpuDevice : public LlvmDevice { + public: + struct AllocInfo { + void *ptr{nullptr}; + size_t size{0}; + bool is_imported{false}; + bool use_preallocated{true}; + bool use_cached{false}; + void *mapped{nullptr}; + }; + + AllocInfo get_alloc_info(const DeviceAllocation handle); + + ~AmdgpuDevice() override{}; + + DeviceAllocation allocate_memory(const AllocParams ¶ms) override; + DeviceAllocation allocate_memory_runtime( + const LlvmRuntimeAllocParams ¶ms) override; + void dealloc_memory(DeviceAllocation handle) override; + + ShaderResourceSet *create_resource_set() final{TI_NOT_IMPLEMENTED}; + + std::unique_ptr create_pipeline( + const PipelineSourceDesc &src, + std::string name = "Pipeline") override{TI_NOT_IMPLEMENTED}; + + uint64 fetch_result_uint64(int i, uint64 *result_buffer) override; + + RhiResult map_range(DevicePtr ptr, uint64_t size, void **mapped_ptr) final { + TI_NOT_IMPLEMENTED; + } + RhiResult map(DeviceAllocation alloc, void **mapped_ptr) final; + + void unmap(DevicePtr ptr) override{TI_NOT_IMPLEMENTED}; + void unmap(DeviceAllocation alloc) override; + + void memcpy_internal(DevicePtr dst, DevicePtr src, uint64_t size) override; + + DeviceAllocation import_memory(void *ptr, size_t size); + + Stream *get_compute_stream() override{TI_NOT_IMPLEMENTED}; + + void wait_idle() override{TI_NOT_IMPLEMENTED}; + + private: + std::vector allocations_; + void validate_device_alloc(const DeviceAllocation alloc) { + if (allocations_.size() <= alloc.alloc_id) { + TI_ERROR("invalid DeviceAllocation"); + } + } + std::unique_ptr caching_allocator_{nullptr}; +}; + +} // namespace amdgpu + +} // namespace lang + +} // namespace taichi diff --git a/taichi/rhi/amdgpu/amdgpu_driver.cpp b/taichi/rhi/amdgpu/amdgpu_driver.cpp new file mode 100644 index 0000000000000..ee47a481cea74 --- /dev/null +++ b/taichi/rhi/amdgpu/amdgpu_driver.cpp @@ -0,0 +1,82 @@ +#include "taichi/rhi/amdgpu/amdgpu_driver.h" + +#include "taichi/system/dynamic_loader.h" +#include "taichi/rhi/amdgpu/amdgpu_context.h" +#include "taichi/util/environ_config.h" + +namespace taichi { +namespace lang { + +std::string get_amdgpu_error_message(uint32 err) { + auto err_name_ptr = + AMDGPUDriver::get_instance_without_context().get_error_name(err); + auto err_string_ptr = + AMDGPUDriver::get_instance_without_context().get_error_string(err); + return fmt::format("AMDGPU Error {}: {}", err_name_ptr, err_string_ptr); +} + +AMDGPUDriverBase::AMDGPUDriverBase() { + disabled_by_env_ = (get_environ_config("TI_ENABLE_AMDGPU", 1) == 0); + if (disabled_by_env_) { + TI_TRACE( + "AMDGPU driver disabled by enviroment variable \"TI_ENABLE_AMDGPU\"."); + } +} + +bool AMDGPUDriverBase::load_lib(std::string lib_linux) { +#if defined(TI_PLATFORM_LINUX) + auto lib_name = lib_linux; +#else + static_assert(false, "Taichi AMDGPU driver supports only Linux."); +#endif + + loader_ = std::make_unique(lib_name); + if (!loader_->loaded()) { + TI_WARN("{} lib not found.", lib_name); + return false; + } else { + TI_TRACE("{} loaded!", lib_name); + return true; + } +} + +bool AMDGPUDriver::detected() { + return !disabled_by_env_ && loader_->loaded(); +} + +AMDGPUDriver::AMDGPUDriver() { + if (!load_lib("libamdhip64.so")) + return; + + loader_->load_function("hipGetErrorName", get_error_name); + loader_->load_function("hipGetErrorString", get_error_string); + loader_->load_function("hipDriverGetVersion", driver_get_version); + + int version; + driver_get_version(&version); + TI_TRACE("AMDGPU driver API (v{}.{}) loaded.", version / 1000, + version % 1000 / 10); + +#define PER_AMDGPU_FUNCTION(name, symbol_name, ...) \ + name.set(loader_->load_function(#symbol_name)); \ + name.set_lock(&lock_); \ + name.set_names(#name, #symbol_name); +#include "taichi/rhi/amdgpu/amdgpu_driver_functions.inc.h" +#undef PER_AMDGPU_FUNCTION +} + +AMDGPUDriver &AMDGPUDriver::get_instance_without_context() { + // Thread safety guaranteed by C++ compiler + // Note this is never deleted until the process finishes + static AMDGPUDriver *instance = new AMDGPUDriver(); + return *instance; +} + +AMDGPUDriver &AMDGPUDriver::get_instance() { + // initialize the AMDGPU context so that the driver APIs can be called later + AMDGPUContext::get_instance(); + return get_instance_without_context(); +} + +} // namespace lang +} // namespace taichi diff --git a/taichi/rhi/amdgpu/amdgpu_driver.h b/taichi/rhi/amdgpu/amdgpu_driver.h new file mode 100644 index 0000000000000..85f8fc77d0030 --- /dev/null +++ b/taichi/rhi/amdgpu/amdgpu_driver.h @@ -0,0 +1,119 @@ +#pragma once + +#include + +#include "taichi/system/dynamic_loader.h" + +namespace taichi { +namespace lang { + +constexpr uint32 HIP_EVENT_DEFAULT = 0x0; +constexpr uint32 HIP_STREAM_DEFAULT = 0x0; +constexpr uint32 HIP_STREAM_NON_BLOCKING = 0x1; +constexpr uint32 HIP_MEM_ATTACH_GLOBAL = 0x1; +constexpr uint32 HIP_MEM_ADVISE_SET_PREFERRED_LOCATION = 3; +constexpr uint32 HIP_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 26; +constexpr uint32 HIP_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 63; +constexpr uint32 HIP_DEVICE_PROPERTIES_STRUCT_SIZE = 792; +constexpr uint32 HIP_DEVICE_GCN_ARCH = 98; +constexpr uint32 HIP_ERROR_ASSERT = 710; +constexpr uint32 HIP_JIT_MAX_REGISTERS = 0; +constexpr uint32 HIP_POINTER_ATTRIBUTE_MEMORY_TYPE = 2; +constexpr uint32 HIP_SUCCESS = 0; +constexpr uint32 HIP_MEMORYTYPE_DEVICE = 1; + +std::string get_amdgpu_error_message(uint32 err); + +template +class AMDGPUFunction { + public: + AMDGPUFunction() { + function_ = nullptr; + } + + void set(void *func_ptr) { + function_ = (func_type *)func_ptr; + } + + uint32 call(Args... args) { + TI_ASSERT(function_ != nullptr); + TI_ASSERT(driver_lock_ != nullptr); + std::lock_guard _(*driver_lock_); + return (uint32)function_(args...); + } + + void set_names(const std::string &name, const std::string &symbol_name) { + name_ = name; + symbol_name_ = symbol_name; + } + + void set_lock(std::mutex *lock) { + driver_lock_ = lock; + } + + std::string get_error_message(uint32 err) { + return get_amdgpu_error_message(err) + + fmt::format(" while calling {} ({})", name_, symbol_name_); + } + + uint32 call_with_warning(Args... args) { + auto err = call(args...); + TI_WARN_IF(err, "{}", get_error_message(err)); + return err; + } + + void operator()(Args... args) { + auto err = call(args...); + TI_ERROR_IF(err, get_error_message(err)); + } + + private: + using func_type = uint32_t(Args...); + + func_type *function_{nullptr}; + std::string name_, symbol_name_; + std::mutex *driver_lock_{nullptr}; +}; + +class AMDGPUDriverBase { + public: + ~AMDGPUDriverBase() = default; + + protected: + std::unique_ptr loader_; + AMDGPUDriverBase(); + + bool load_lib(std::string lib_linux); + + bool disabled_by_env_{false}; +}; + +class AMDGPUDriver : protected AMDGPUDriverBase { + public: +#define PER_AMDGPU_FUNCTION(name, symbol_name, ...) \ + AMDGPUFunction<__VA_ARGS__> name; +#include "taichi/rhi/amdgpu/amdgpu_driver_functions.inc.h" +#undef PER_AMDGPU_FUNCTION + + char (*get_error_name)(uint32); + + char (*get_error_string)(uint32); + + void (*driver_get_version)(int *); + + bool detected(); + + static AMDGPUDriver &get_instance(); + + static AMDGPUDriver &get_instance_without_context(); + + private: + AMDGPUDriver(); + + std::mutex lock_; + + // bool rocm_version_valid_{false}; +}; + +} // namespace lang +} // namespace taichi diff --git a/taichi/rhi/amdgpu/amdgpu_driver_functions.inc.h b/taichi/rhi/amdgpu/amdgpu_driver_functions.inc.h new file mode 100644 index 0000000000000..a4f0e6fdea1c3 --- /dev/null +++ b/taichi/rhi/amdgpu/amdgpu_driver_functions.inc.h @@ -0,0 +1,127 @@ +// Init +PER_AMDGPU_FUNCTION(init, hipInit, unsigned int); + +// Device management +PER_AMDGPU_FUNCTION(device_get_count, hipGetDeviceCount, int *); +PER_AMDGPU_FUNCTION(device_get_attribute, + hipDeviceGetAttribute, + int *, + uint32, + int); +PER_AMDGPU_FUNCTION(device_get_prop, hipGetDeviceProperties, void *, void *); +PER_AMDGPU_FUNCTION(device_get_name, hipDeviceGetName, char *, int, void *); +PER_AMDGPU_FUNCTION(device_get, hipDeviceGet, void *, void *); + +// Context management +PER_AMDGPU_FUNCTION(context_create, hipCtxCreate, void *, int, void *); +PER_AMDGPU_FUNCTION(context_set_current, hipCtxSetCurrent, void *); +PER_AMDGPU_FUNCTION(context_get_current, hipCtxGetCurrent, void **); + +// Stream management +PER_AMDGPU_FUNCTION(stream_create, hipStreamCreate, void **, uint32); + +// Memory management +PER_AMDGPU_FUNCTION(memcpy_host_to_device, + hipMemcpyHtoD, + void *, + void *, + std::size_t); +PER_AMDGPU_FUNCTION(memcpy_device_to_host, + hipMemcpyDtoH, + void *, + void *, + std::size_t); +PER_AMDGPU_FUNCTION(memcpy_device_to_device, + hipMemcpyDtoD, + void *, + void *, + std::size_t); +PER_AMDGPU_FUNCTION(memcpy, + hipMemcpy, + void *, + void *, + std::size_t, + unsigned int); +PER_AMDGPU_FUNCTION(memcpy_async, + hipMemcpyAsync, + void *, + void *, + std::size_t, + unsigned int, + void *); +PER_AMDGPU_FUNCTION(memcpy_host_to_device_async, + hipMemcpyHtoDAsync, + void *, + void *, + std::size_t, + void *); +PER_AMDGPU_FUNCTION(memcpy_device_to_host_async, + hipMemcpyDtoHAsync, + void *, + void *, + std::size_t, + void *); +PER_AMDGPU_FUNCTION(malloc, hipMalloc, void **, std::size_t); +PER_AMDGPU_FUNCTION(malloc_managed, + hipMallocManaged, + void **, + std::size_t, + uint32); +PER_AMDGPU_FUNCTION(memset, hipMemset, void *, uint8, std::size_t); +PER_AMDGPU_FUNCTION(mem_free, hipFree, void *); +PER_AMDGPU_FUNCTION(mem_get_info, hipMemGetInfo, std::size_t *, std::size_t *); +PER_AMDGPU_FUNCTION(mem_get_attribute, + hipPointerGetAttribute, + void *, + uint32, + void *); +PER_AMDGPU_FUNCTION(mem_get_attributes, + hipPointerGetAttributes, + void *, + void *); + +// Module and kernels +PER_AMDGPU_FUNCTION(module_get_function, + hipModuleGetFunction, + void **, + void *, + const char *); +PER_AMDGPU_FUNCTION(module_load_data, hipModuleLoadData, void **, const void *); +PER_AMDGPU_FUNCTION(launch_kernel, + hipModuleLaunchKernel, + void *, + uint32, + uint32, + uint32, + uint32, + uint32, + uint32, + uint32, + void *, + void **, + void **); +PER_AMDGPU_FUNCTION(kernel_get_attribute, + hipFuncGetAttribute, + int *, + uint32, + void *); +PER_AMDGPU_FUNCTION(kernel_get_occupancy, + hipOccupancyMaxActiveBlocksPerMultiprocessor, + int *, + void *, + int, + size_t); + +// Stream management +PER_AMDGPU_FUNCTION(stream_synchronize, hipStreamSynchronize, void *); + +// Event management +PER_AMDGPU_FUNCTION(event_create, hipEventCreateWithFlags, void **, uint32); +PER_AMDGPU_FUNCTION(event_destroy, hipEventDestroy, void *); +PER_AMDGPU_FUNCTION(event_record, hipEventRecord, void *, void *); +PER_AMDGPU_FUNCTION(event_synchronize, hipEventSynchronize, void *); +PER_AMDGPU_FUNCTION(event_elapsed_time, + hipEventElapsedTime, + float *, + void *, + void *); diff --git a/taichi/rhi/cpu/cpu_device.h b/taichi/rhi/cpu/cpu_device.h index 840e58b47a3a4..1f44f603bd4a8 100644 --- a/taichi/rhi/cpu/cpu_device.h +++ b/taichi/rhi/cpu/cpu_device.h @@ -11,33 +11,10 @@ namespace taichi::lang { namespace cpu { -class CpuResourceBinder : public ResourceBinder { - public: - ~CpuResourceBinder() override { - } - - void rw_buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) override{TI_NOT_IMPLEMENTED}; - void rw_buffer(uint32_t set, - uint32_t binding, - DeviceAllocation alloc) override{TI_NOT_IMPLEMENTED}; - - void buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) override{TI_NOT_IMPLEMENTED}; - void buffer(uint32_t set, uint32_t binding, DeviceAllocation alloc) override{ - TI_NOT_IMPLEMENTED}; -}; - class CpuPipeline : public Pipeline { public: ~CpuPipeline() override { } - - ResourceBinder *resource_binder() override{TI_NOT_IMPLEMENTED}; }; class CpuCommandList : public CommandList { @@ -46,7 +23,11 @@ class CpuCommandList : public CommandList { } void bind_pipeline(Pipeline *p) override{TI_NOT_IMPLEMENTED}; - void bind_resources(ResourceBinder *binder) override{TI_NOT_IMPLEMENTED}; + RhiResult bind_shader_resources(ShaderResourceSet *res, + int set_index = 0) override{ + TI_NOT_IMPLEMENTED}; + RhiResult bind_raster_resources(RasterResources *res) override{ + TI_NOT_IMPLEMENTED}; void buffer_barrier(DevicePtr ptr, size_t size) override{TI_NOT_IMPLEMENTED}; void buffer_barrier(DeviceAllocation alloc) override{TI_NOT_IMPLEMENTED}; void memory_barrier() override{TI_NOT_IMPLEMENTED}; @@ -91,6 +72,8 @@ class CpuDevice : public LlvmDevice { const LlvmRuntimeAllocParams ¶ms) override; void dealloc_memory(DeviceAllocation handle) override; + ShaderResourceSet *create_resource_set() override{TI_NOT_IMPLEMENTED}; + std::unique_ptr create_pipeline( const PipelineSourceDesc &src, std::string name = "Pipeline") override{TI_NOT_IMPLEMENTED}; diff --git a/taichi/rhi/cuda/cuda_device.h b/taichi/rhi/cuda/cuda_device.h index f230594e6a875..43636b1feec94 100644 --- a/taichi/rhi/cuda/cuda_device.h +++ b/taichi/rhi/cuda/cuda_device.h @@ -11,33 +11,10 @@ namespace taichi::lang { namespace cuda { -class CudaResourceBinder : public ResourceBinder { - public: - ~CudaResourceBinder() override { - } - - void rw_buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) override{TI_NOT_IMPLEMENTED}; - void rw_buffer(uint32_t set, - uint32_t binding, - DeviceAllocation alloc) override{TI_NOT_IMPLEMENTED}; - - void buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) override{TI_NOT_IMPLEMENTED}; - void buffer(uint32_t set, uint32_t binding, DeviceAllocation alloc) override{ - TI_NOT_IMPLEMENTED}; -}; - class CudaPipeline : public Pipeline { public: ~CudaPipeline() override { } - - ResourceBinder *resource_binder() override{TI_NOT_IMPLEMENTED}; }; class CudaCommandList : public CommandList { @@ -46,7 +23,10 @@ class CudaCommandList : public CommandList { } void bind_pipeline(Pipeline *p) override{TI_NOT_IMPLEMENTED}; - void bind_resources(ResourceBinder *binder) override{TI_NOT_IMPLEMENTED}; + RhiResult bind_shader_resources(ShaderResourceSet *res, + int set_index = 0) final{TI_NOT_IMPLEMENTED}; + RhiResult bind_raster_resources(RasterResources *res) final{ + TI_NOT_IMPLEMENTED}; void buffer_barrier(DevicePtr ptr, size_t size) override{TI_NOT_IMPLEMENTED}; void buffer_barrier(DeviceAllocation alloc) override{TI_NOT_IMPLEMENTED}; void memory_barrier() override{TI_NOT_IMPLEMENTED}; @@ -104,6 +84,8 @@ class CudaDevice : public LlvmDevice { const LlvmRuntimeAllocParams ¶ms) override; void dealloc_memory(DeviceAllocation handle) override; + ShaderResourceSet *create_resource_set() final{TI_NOT_IMPLEMENTED}; + std::unique_ptr create_pipeline( const PipelineSourceDesc &src, std::string name = "Pipeline") override{TI_NOT_IMPLEMENTED}; diff --git a/taichi/rhi/device.h b/taichi/rhi/device.h index 299a01510925f..a2b59ba970d85 100644 --- a/taichi/rhi/device.h +++ b/taichi/rhi/device.h @@ -51,7 +51,6 @@ enum class BlendFactor : uint32_t { class Device; struct DeviceAllocation; struct DevicePtr; -struct LLVMRuntime; // TODO: Figure out how to support images. Temporary solutions is to have all // opque types such as images work as an allocation @@ -100,52 +99,93 @@ constexpr DevicePtr kDeviceNullPtr{}; // TODO: fill this with the required options struct ImageSamplerConfig {}; -class ResourceBinder { +// A set of shader resources (that is bound at once) +class TI_DLL_EXPORT ShaderResourceSet { public: - virtual ~ResourceBinder() { - } - - // In Vulkan this is called Storage Buffer (shader can store) - virtual void rw_buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) = 0; - virtual void rw_buffer(uint32_t set, - uint32_t binding, - DeviceAllocation alloc) = 0; - - // In Vulkan this is called Uniform Buffer (shader can only load) - virtual void buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) = 0; - virtual void buffer(uint32_t set, - uint32_t binding, - DeviceAllocation alloc) = 0; - - virtual void image(uint32_t set, - uint32_t binding, - DeviceAllocation alloc, - ImageSamplerConfig sampler_config) { - TI_NOT_IMPLEMENTED + virtual ~ShaderResourceSet() = default; + + /** + * Bind a RW subregion of a buffer resource (StorgeBuffer / SSBO) + * @params[in] binding The binding index of the resource + * @params[in] ptr The Device Pointer that is going to be bound + * @params[in] size The size of the bound region of the buffer + */ + virtual ShaderResourceSet &rw_buffer(uint32_t binding, + DevicePtr ptr, + size_t size) = 0; + + /** + * Bind an entire RW buffer resource (StorgeBuffer / SSBO) + * @params[in] binding The binding index of the resource + * @params[in] alloc The Device Allocation that is going to be bound + */ + virtual ShaderResourceSet &rw_buffer(uint32_t binding, + DeviceAllocation alloc) = 0; + + /** + * Bind a read-only subregion of a buffer resource (Constants / UBO) + * @params[in] binding The binding index of the resource + * @params[in] ptr The Device Pointer that is going to be bound + * @params[in] size The size of the bound region of the buffer + */ + virtual ShaderResourceSet &buffer(uint32_t binding, + DevicePtr ptr, + size_t size) = 0; + + /** + * Bind an entire read-only buffer resource (Constants / UBO) + * @params[in] binding The binding index of the resource + * @params[in] alloc The Device Allocation that is going to be bound + */ + virtual ShaderResourceSet &buffer(uint32_t binding, + DeviceAllocation alloc) = 0; + + /** + * Bind a read-only image resource (SRV / Texture) + * @params[in] binding The binding index of the resource + * @params[in] alloc The Device Allocation that is going to be bound + * @params[in] sampler_config The texture sampling configuration + */ + virtual ShaderResourceSet &image(uint32_t binding, + DeviceAllocation alloc, + ImageSamplerConfig sampler_config) { + TI_NOT_IMPLEMENTED; } - virtual void rw_image(uint32_t set, - uint32_t binding, - DeviceAllocation alloc, - int lod) { + /** + * Bind a RW image resource (UAV / Storage Image) + * @params binding The binding index of the resource + * @params alloc The Device Allocation that is going to be bound + */ + virtual ShaderResourceSet &rw_image(uint32_t binding, + DeviceAllocation alloc, + int lod) { TI_NOT_IMPLEMENTED } +}; - // Set vertex buffer (not implemented in compute only device) - virtual void vertex_buffer(DevicePtr ptr, uint32_t binding = 0) { +// A set of states / resources for rasterization +class TI_DLL_EXPORT RasterResources { + public: + virtual ~RasterResources() = default; + + /** + * Set a vertex buffer for the rasterization + * @params ptr The Device Pointer to the vertices data + * @params binding The binding index of the vertex buffer + */ + virtual RasterResources &vertex_buffer(DevicePtr ptr, uint32_t binding = 0) { TI_NOT_IMPLEMENTED } - // Set index buffer (not implemented in compute only device) - // index_width = 4 -> uint32 index - // index_width = 2 -> uint16 index - virtual void index_buffer(DevicePtr ptr, size_t index_width) { + /** + * Set an index buffer for the rasterization + * @params ptr The Device Pointer to the vertices data + * @params index_width The index data width (in bits). + * index_width = 32 -> uint32 index + * index_width = 16 -> uint16 index + */ + virtual RasterResources &index_buffer(DevicePtr ptr, size_t index_width) { TI_NOT_IMPLEMENTED } }; @@ -187,12 +227,10 @@ enum class BufferFormat : uint32_t { #undef PER_BUFFER_FORMAT }; -class Pipeline { +class TI_DLL_EXPORT Pipeline { public: virtual ~Pipeline() { } - - virtual ResourceBinder *resource_binder() = 0; }; enum class ImageDimension { @@ -232,13 +270,48 @@ struct ImageCopyParams { uint32_t depth{1}; }; -class CommandList { +class TI_DLL_EXPORT CommandList { public: virtual ~CommandList() { } + /** + * Bind a pipeline to the command list. + * Doing so resets all bound resources. + * @params[in] pipeline The pipeline to be bound + */ virtual void bind_pipeline(Pipeline *p) = 0; - virtual void bind_resources(ResourceBinder *binder) = 0; + + /** + * Bind a ShaderResourceSet to a set index. + * - If the set index is already bound, the previous binding will be + * overwritten. + * - A set index can only be bound with a single ShaderResourceSet. + * - If the input set is empty, this command is a no-op. + * @params[in] res The ShaderResourceSet to be bound. + * @params[in] set_index The index the resources will be bound to. + * @return The binding result code + * `success` If the binding succeded + * `invalid_usage` If `res` is incompatible with current pipeline + * `not_supported` If some bindings are not supported by the backend + * `out_of_memory` If binding failed due to OOM conditions + * `error` If binding failed due to other reasons + */ + virtual RhiResult bind_shader_resources(ShaderResourceSet *res, + int set_index = 0) = 0; + + /** + * Bind RasterResources to the command list. + * - If the input resource is empty, this command is a no-op. + * @params res The RasterResources to be bound. + * @return The binding result code + * `success` If the binding succeded + * `invalid_usage` If `res` is incompatible with current pipeline + * `not_supported` If some bindings are not supported by the backend + * `error` If binding failed due to other reasons + */ + virtual RhiResult bind_raster_resources(RasterResources *res) = 0; + virtual void buffer_barrier(DevicePtr ptr, size_t size) = 0; virtual void buffer_barrier(DeviceAllocation alloc) = 0; virtual void memory_barrier() = 0; @@ -348,7 +421,7 @@ enum class AllocUsage : int { MAKE_ENUM_FLAGS(AllocUsage) -class StreamSemaphoreObject { +class TI_DLL_EXPORT StreamSemaphoreObject { public: virtual ~StreamSemaphoreObject() { } @@ -356,7 +429,7 @@ class StreamSemaphoreObject { using StreamSemaphore = std::shared_ptr; -class Stream { +class TI_DLL_EXPORT Stream { public: virtual ~Stream() { } @@ -376,7 +449,7 @@ class Stream { } }; -class Device { +class TI_DLL_EXPORT Device { DeviceCapabilityConfig caps_{}; public: @@ -423,6 +496,20 @@ class Device { // Wait for all tasks to complete (task from all streams) virtual void wait_idle() = 0; + /** + * Create a new shader resource set + * @return The new shader resource set pointer + */ + virtual ShaderResourceSet *create_resource_set() = 0; + + /** + * Create a new shader resource set (wrapped in unique ptr) + * @return The new shader resource set unique pointer + */ + inline std::unique_ptr create_resource_set_unique() { + return std::unique_ptr(this->create_resource_set()); + } + /** * Map a range within a DeviceAllocation memory into host address space. * @@ -501,7 +588,7 @@ class Device { } }; -class Surface { +class TI_DLL_EXPORT Surface { public: virtual ~Surface() { } @@ -605,6 +692,20 @@ class TI_DLL_EXPORT GraphicsDevice : public Device { virtual Stream *get_graphics_stream() = 0; + /** + * Create a new raster resources set + * @return The new RasterResources pointer + */ + virtual RasterResources *create_raster_resources() = 0; + + /** + * Create a new raster resources set (wrapped in unique ptr) + * @return The new RasterResources unique pointer + */ + inline std::unique_ptr create_raster_resources_unique() { + return std::unique_ptr(this->create_raster_resources()); + } + virtual std::unique_ptr create_surface( const SurfaceConfig &config) = 0; // You are not expected to call this directly. If you want to use this image diff --git a/taichi/rhi/dx/dx_device.cpp b/taichi/rhi/dx/dx_device.cpp index c942adf95640f..47cbebf26eed2 100644 --- a/taichi/rhi/dx/dx_device.cpp +++ b/taichi/rhi/dx/dx_device.cpp @@ -25,59 +25,51 @@ void check_dx_error(HRESULT hr, const char *msg) { } } -void Dx11ResourceBinder::rw_buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) { +ShaderResourceSet &Dx11ResourceSet::rw_buffer(uint32_t binding, + DevicePtr ptr, + size_t size) { TI_NOT_IMPLEMENTED; + return *this; } -void Dx11ResourceBinder::rw_buffer(uint32_t set, - uint32_t binding, - DeviceAllocation alloc) { +ShaderResourceSet &Dx11ResourceSet::rw_buffer(uint32_t binding, + DeviceAllocation alloc) { uav_binding_to_alloc_id_[binding] = alloc.alloc_id; + return *this; } -void Dx11ResourceBinder::buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) { +ShaderResourceSet &Dx11ResourceSet::buffer(uint32_t binding, + DevicePtr ptr, + size_t size) { TI_NOT_IMPLEMENTED; + return *this; } -void Dx11ResourceBinder::buffer(uint32_t set, - uint32_t binding, - DeviceAllocation alloc) { +ShaderResourceSet &Dx11ResourceSet::buffer(uint32_t binding, + DeviceAllocation alloc) { // args_t now use constant buffers. // Example: // cbuffer args_t : register(b0) // { ... } cb_binding_to_alloc_id_[binding] = alloc.alloc_id; + return *this; } -void Dx11ResourceBinder::image(uint32_t set, - uint32_t binding, - DeviceAllocation alloc, - ImageSamplerConfig sampler_config) { +ShaderResourceSet &Dx11ResourceSet::image(uint32_t binding, + DeviceAllocation alloc, + ImageSamplerConfig sampler_config) { TI_NOT_IMPLEMENTED; + return *this; } -void Dx11ResourceBinder::rw_image(uint32_t set, - uint32_t binding, - DeviceAllocation alloc, - int lod) { +ShaderResourceSet &Dx11ResourceSet::rw_image(uint32_t binding, + DeviceAllocation alloc, + int lod) { TI_NOT_IMPLEMENTED; + return *this; } -void Dx11ResourceBinder::vertex_buffer(DevicePtr ptr, uint32_t binding) { - TI_NOT_IMPLEMENTED; -} - -void Dx11ResourceBinder::index_buffer(DevicePtr ptr, size_t index_width) { - TI_NOT_IMPLEMENTED; -} - -Dx11ResourceBinder::~Dx11ResourceBinder() { +Dx11ResourceSet::~Dx11ResourceSet() { } Dx11CommandList::Dx11CommandList(Dx11Device *ti_device) : device_(ti_device) { @@ -102,11 +94,16 @@ void Dx11CommandList::bind_pipeline(Pipeline *p) { d3d11_deferred_context_->CSSetShader(pipeline->get_program(), nullptr, 0); } -void Dx11CommandList::bind_resources(ResourceBinder *binder_) { - Dx11ResourceBinder *binder = static_cast(binder_); +RhiResult Dx11CommandList::bind_shader_resources(ShaderResourceSet *res, + int set_index) { + Dx11ResourceSet *set = static_cast(res); + if (set_index > 0) { + // TODO: Add remapping? + return RhiResult::not_supported; + } // UAV - for (auto &[binding, alloc_id] : binder->uav_binding_to_alloc_id()) { + for (auto &[binding, alloc_id] : set->uav_binding_to_alloc_id()) { ID3D11UnorderedAccessView *uav = device_->alloc_id_to_uav(d3d11_deferred_context_, alloc_id); d3d11_deferred_context_->CSSetUnorderedAccessViews(binding, 1, &uav, @@ -114,7 +111,7 @@ void Dx11CommandList::bind_resources(ResourceBinder *binder_) { } // CBV - for (auto &[binding, alloc_id] : binder->cb_binding_to_alloc_id()) { + for (auto &[binding, alloc_id] : set->cb_binding_to_alloc_id()) { auto cb_buffer = device_->alloc_id_to_cb_buffer(d3d11_deferred_context_, alloc_id); @@ -122,6 +119,12 @@ void Dx11CommandList::bind_resources(ResourceBinder *binder_) { cb_slot_watermark_ = std::max(cb_slot_watermark_, int(binding)); } + + return RhiResult::success; +} + +RhiResult Dx11CommandList::bind_raster_resources(RasterResources *res) { + TI_NOT_IMPLEMENTED; } void Dx11CommandList::buffer_barrier(DevicePtr ptr, size_t size) { @@ -946,10 +949,6 @@ Dx11Pipeline::Dx11Pipeline(const PipelineSourceDesc &desc, Dx11Pipeline::~Dx11Pipeline() { } -ResourceBinder *Dx11Pipeline::resource_binder() { - return &binder_; -} - } // namespace directx11 } // namespace taichi::lang diff --git a/taichi/rhi/dx/dx_device.h b/taichi/rhi/dx/dx_device.h index 0f20f95a3427b..4779d3147a700 100644 --- a/taichi/rhi/dx/dx_device.h +++ b/taichi/rhi/dx/dx_device.h @@ -19,37 +19,23 @@ constexpr bool kD3d11ForceRef = false; void check_dx_error(HRESULT hr, const char *msg); -class Dx11ResourceBinder : public ResourceBinder { +class Dx11ResourceSet : public ShaderResourceSet { public: - ~Dx11ResourceBinder() override; - void rw_buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) override; - void rw_buffer(uint32_t set, - uint32_t binding, - DeviceAllocation alloc) override; - void buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) override; - void buffer(uint32_t set, uint32_t binding, DeviceAllocation alloc) override; - void image(uint32_t set, - uint32_t binding, - DeviceAllocation alloc, - ImageSamplerConfig sampler_config) override; - void rw_image(uint32_t set, - uint32_t binding, - DeviceAllocation alloc, - int lod) override; - - // Set vertex buffer (not implemented in compute only device) - void vertex_buffer(DevicePtr ptr, uint32_t binding = 0) override; - - // Set index buffer (not implemented in compute only device) - // index_width = 4 -> uint32 index - // index_width = 2 -> uint16 index - void index_buffer(DevicePtr ptr, size_t index_width) override; + Dx11ResourceSet() = default; + ~Dx11ResourceSet() override; + + ShaderResourceSet &rw_buffer(uint32_t binding, + DevicePtr ptr, + size_t size) final; + ShaderResourceSet &rw_buffer(uint32_t binding, DeviceAllocation alloc) final; + ShaderResourceSet &buffer(uint32_t binding, DevicePtr ptr, size_t size) final; + ShaderResourceSet &buffer(uint32_t binding, DeviceAllocation alloc) final; + ShaderResourceSet &image(uint32_t binding, + DeviceAllocation alloc, + ImageSamplerConfig sampler_config) final; + ShaderResourceSet &rw_image(uint32_t binding, + DeviceAllocation alloc, + int lod) final; const std::unordered_map &uav_binding_to_alloc_id() { return uav_binding_to_alloc_id_; @@ -64,6 +50,20 @@ class Dx11ResourceBinder : public ResourceBinder { std::unordered_map cb_binding_to_alloc_id_; }; +class Dx11RasterResources : public RasterResources { + ~Dx11RasterResources() override = default; + + RasterResources &vertex_buffer(DevicePtr ptr, uint32_t binding = 0) final { + TI_NOT_IMPLEMENTED; + return *this; + } + + RasterResources &index_buffer(DevicePtr ptr, size_t index_width) final { + TI_NOT_IMPLEMENTED; + return *this; + } +}; + class Dx11Device; class Dx11Pipeline : public Pipeline { @@ -72,7 +72,7 @@ class Dx11Pipeline : public Pipeline { const std::string &name, Dx11Device *device); ~Dx11Pipeline() override; - ResourceBinder *resource_binder() override; + ID3D11ComputeShader *get_program() { return compute_shader_; } @@ -86,7 +86,6 @@ class Dx11Pipeline : public Pipeline { Dx11Device *device_{nullptr}; ID3D11ComputeShader *compute_shader_{nullptr}; - Dx11ResourceBinder binder_; std::string name_; }; @@ -114,7 +113,9 @@ class Dx11CommandList : public CommandList { ~Dx11CommandList() override; void bind_pipeline(Pipeline *p) override; - void bind_resources(ResourceBinder *binder) override; + RhiResult bind_shader_resources(ShaderResourceSet *res, + int set_index = 0) final; + RhiResult bind_raster_resources(RasterResources *res) final; void buffer_barrier(DevicePtr ptr, size_t size) override; void buffer_barrier(DeviceAllocation alloc) override; void memory_barrier() override; @@ -174,6 +175,15 @@ class Dx11Device : public GraphicsDevice { DeviceAllocation allocate_memory(const AllocParams ¶ms) override; void dealloc_memory(DeviceAllocation handle) override; + + ShaderResourceSet *create_resource_set() final { + return new Dx11ResourceSet; + } + + RasterResources *create_raster_resources() final { + return new Dx11RasterResources; + } + std::unique_ptr create_pipeline( const PipelineSourceDesc &src, std::string name = "Pipeline") override; diff --git a/taichi/rhi/impl_support.h b/taichi/rhi/impl_support.h index 38d66ef824c07..4e6729d994fd8 100644 --- a/taichi/rhi/impl_support.h +++ b/taichi/rhi/impl_support.h @@ -148,5 +148,12 @@ class SyncedPtrStableObjectList { std::vector free_nodes_; }; +// A helper to combine hash +template +inline void hash_combine(std::size_t &seed, const T &v) { + std::hash hasher; + seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); +} + } // namespace rhi_impl } // namespace taichi::lang diff --git a/taichi/rhi/interop/vulkan_cpu_interop.cpp b/taichi/rhi/interop/vulkan_cpu_interop.cpp index e8e4656df5bed..2c33af0a95f4d 100644 --- a/taichi/rhi/interop/vulkan_cpu_interop.cpp +++ b/taichi/rhi/interop/vulkan_cpu_interop.cpp @@ -44,7 +44,7 @@ void memcpy_cpu_to_vulkan_via_staging(DevicePtr dst, CpuDevice::AllocInfo src_alloc_info = cpu_dev->get_alloc_info(src_alloc); void *dst_ptr{nullptr}; - TI_ASSERT(vk_dev->map_range(dst, size, &dst_ptr) == RhiResult::success); + TI_ASSERT(vk_dev->map_range(staging, size, &dst_ptr) == RhiResult::success); void *src_ptr = (uint8_t *)src_alloc_info.ptr + src.offset; memcpy(dst_ptr, src_ptr, size); diff --git a/taichi/rhi/metal/device.cpp b/taichi/rhi/metal/device.cpp index 7cdd6b5e215c5..77f406f03e8ac 100644 --- a/taichi/rhi/metal/device.cpp +++ b/taichi/rhi/metal/device.cpp @@ -11,7 +11,7 @@ namespace metal { #ifdef TI_PLATFORM_OSX namespace { -class ResourceBinderImpl : public ResourceBinder { +class ShaderResourceSetImpl : public ShaderResourceSet { public: struct Binding { DeviceAllocationId alloc_id{0}; @@ -22,31 +22,32 @@ class ResourceBinderImpl : public ResourceBinder { }; using BindingMap = std::unordered_map; - explicit ResourceBinderImpl(const Device *dev) : dev_(dev) { + explicit ShaderResourceSetImpl(const Device *dev) : dev_(dev) { } // RW buffers - void rw_buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) override { - bind_buffer(set, binding, ptr, ptr.offset, /*is_constant=*/false); + ShaderResourceSet &rw_buffer(uint32_t binding, + DevicePtr ptr, + size_t size) override { + bind_buffer(binding, ptr, ptr.offset, /*is_constant=*/false); + return *this; } - void rw_buffer(uint32_t set, - uint32_t binding, - DeviceAllocation alloc) override { - bind_buffer(set, binding, alloc, /*offset=*/0, /*is_constant=*/false); + ShaderResourceSet &rw_buffer(uint32_t binding, + DeviceAllocation alloc) override { + bind_buffer(binding, alloc, /*offset=*/0, /*is_constant=*/false); + return *this; } // Constant buffers - void buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) override { - bind_buffer(set, binding, ptr, ptr.offset, /*is_constant=*/false); + ShaderResourceSet &buffer(uint32_t binding, + DevicePtr ptr, + size_t size) override { + bind_buffer(binding, ptr, ptr.offset, /*is_constant=*/false); + return *this; } - void buffer(uint32_t set, uint32_t binding, DeviceAllocation alloc) override { - bind_buffer(set, binding, alloc, /*offset=*/0, /*is_constant=*/true); + ShaderResourceSet &buffer(uint32_t binding, DeviceAllocation alloc) override { + bind_buffer(binding, alloc, /*offset=*/0, /*is_constant=*/true); + return *this; } const BindingMap &binding_map() const { @@ -54,12 +55,10 @@ class ResourceBinderImpl : public ResourceBinder { } private: - void bind_buffer(uint32_t set, - uint32_t binding, + void bind_buffer(uint32_t binding, const DeviceAllocation &alloc, uint64_t offset, bool is_constant) { - TI_ASSERT(set == 0); TI_ASSERT(alloc.device == dev_); binding_map_[binding] = {alloc.alloc_id, offset, is_constant}; } @@ -74,11 +73,6 @@ class PipelineImpl : public Pipeline { : pipeline_state_(std::move(pipeline)) { } - ResourceBinder *resource_binder() override { - // TODO: Hmm, why do we need this interface? - return nullptr; - } - MTLComputePipelineState *mtl_pipeline_state() { return pipeline_state_.get(); } @@ -91,7 +85,7 @@ class CommandListImpl : public CommandList { private: struct ComputeEncoderBuilder { MTLComputePipelineState *pipeline{nullptr}; - ResourceBinderImpl::BindingMap binding_map; + ShaderResourceSetImpl::BindingMap binding_map; }; public: @@ -113,9 +107,15 @@ class CommandListImpl : public CommandList { static_cast(p)->mtl_pipeline_state(); } - void bind_resources(ResourceBinder *binder) override { + RhiResult bind_shader_resources(ShaderResourceSet *res, + int set_index = 0) final { get_or_make_compute_builder()->binding_map = - static_cast(binder)->binding_map(); + static_cast(res)->binding_map(); + return RhiResult::success; + } + + RhiResult bind_raster_resources(RasterResources *res) final { + TI_NOT_IMPLEMENTED; } void buffer_barrier(DevicePtr ptr, size_t size) override { @@ -322,6 +322,10 @@ class DeviceImpl : public Device, public AllocToMTLBufferMapper { return std::make_unique(std::move(pipeline)); } + ShaderResourceSet *create_resource_set() final { + return new ShaderResourceSetImpl(this); + } + RhiResult map_range(DevicePtr ptr, uint64_t size, void **mapped_ptr) final { auto *mem = find(ptr).mem; if (!mem) { diff --git a/taichi/rhi/opengl/opengl_device.cpp b/taichi/rhi/opengl/opengl_device.cpp index e7c8d71c2f6b7..759edb1637f43 100644 --- a/taichi/rhi/opengl/opengl_device.cpp +++ b/taichi/rhi/opengl/opengl_device.cpp @@ -182,62 +182,42 @@ void check_opengl_error(const std::string &msg) { } } -GLResourceBinder::~GLResourceBinder() { +GLResourceSet::~GLResourceSet() { } -void GLResourceBinder::rw_buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) { - // FIXME: Implement ranged bind - TI_NOT_IMPLEMENTED; +GLResourceSet &GLResourceSet::rw_buffer(uint32_t binding, + DevicePtr ptr, + size_t size) { + ssbo_binding_map_[binding] = {GLuint(ptr.alloc_id), ptr.offset, size}; + return *this; } -void GLResourceBinder::rw_buffer(uint32_t set, - uint32_t binding, - DeviceAllocation alloc) { - TI_ASSERT_INFO(set == 0, "OpenGL only supports set = 0, requested set = {}", - set); - ssbo_binding_map_[binding] = alloc.alloc_id; +GLResourceSet &GLResourceSet::rw_buffer(uint32_t binding, + DeviceAllocation alloc) { + return rw_buffer(binding, alloc.get_ptr(0), -1); } -void GLResourceBinder::buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) { - // FIXME: Implement ranged bind - TI_NOT_IMPLEMENTED; +GLResourceSet &GLResourceSet::buffer(uint32_t binding, + DevicePtr ptr, + size_t size) { + ubo_binding_map_[binding] = {GLuint(ptr.alloc_id), ptr.offset, size}; + return *this; } -void GLResourceBinder::buffer(uint32_t set, - uint32_t binding, - DeviceAllocation alloc) { - TI_ASSERT_INFO(set == 0, "OpenGL only supports set = 0, requested set = {}", - set); - ubo_binding_map_[binding] = alloc.alloc_id; +GLResourceSet &GLResourceSet::buffer(uint32_t binding, DeviceAllocation alloc) { + return buffer(binding, alloc.get_ptr(0), -1); } -void GLResourceBinder::image(uint32_t set, - uint32_t binding, - DeviceAllocation alloc, - ImageSamplerConfig sampler_config) { - TI_ASSERT_INFO(set == 0, "OpenGL only supports set = 0, requested set = {}", - set); +GLResourceSet &GLResourceSet::image(uint32_t binding, + DeviceAllocation alloc, + ImageSamplerConfig sampler_config) { texture_binding_map_[binding] = alloc.alloc_id; + return *this; } -void GLResourceBinder::rw_image(uint32_t set, - uint32_t binding, - DeviceAllocation alloc, - int lod) { - TI_NOT_IMPLEMENTED; -} - -void GLResourceBinder::vertex_buffer(DevicePtr ptr, uint32_t binding) { - TI_NOT_IMPLEMENTED; -} - -void GLResourceBinder::index_buffer(DevicePtr ptr, size_t index_width) { +GLResourceSet &GLResourceSet::rw_image(uint32_t binding, + DeviceAllocation alloc, + int lod) { TI_NOT_IMPLEMENTED; } @@ -322,10 +302,6 @@ GLPipeline::~GLPipeline() { check_opengl_error("glDeleteShader"); } -ResourceBinder *GLPipeline::resource_binder() { - return &binder_; -} - GLCommandList::~GLCommandList() { } @@ -336,33 +312,39 @@ void GLCommandList::bind_pipeline(Pipeline *p) { recorded_commands_.push_back(std::move(cmd)); } -void GLCommandList::bind_resources(ResourceBinder *_binder) { - GLResourceBinder *binder = static_cast(_binder); - for (auto &[binding, buffer] : binder->ssbo_binding_map()) { +RhiResult GLCommandList::bind_shader_resources(ShaderResourceSet *res, + int set_index) { + GLResourceSet *set = static_cast(res); + for (auto &[binding, buffer] : set->ssbo_binding_map()) { auto cmd = std::make_unique(); - cmd->buffer = buffer; + cmd->buffer = buffer.buffer; + cmd->offset = buffer.offset; + cmd->size = buffer.size; cmd->index = binding; recorded_commands_.push_back(std::move(cmd)); } - for (auto &[binding, buffer] : binder->ubo_binding_map()) { + for (auto &[binding, buffer] : set->ubo_binding_map()) { auto cmd = std::make_unique(); - cmd->buffer = buffer; + cmd->buffer = buffer.buffer; + cmd->offset = buffer.offset; + cmd->size = buffer.size; cmd->index = binding; cmd->target = GL_UNIFORM_BUFFER; recorded_commands_.push_back(std::move(cmd)); } - for (auto &[binding, texture] : binder->texture_binding_map()) { + for (auto &[binding, texture] : set->texture_binding_map()) { auto cmd = std::make_unique(); cmd->texture = texture; cmd->index = binding; cmd->target = device_->get_image_gl_dims(texture); recorded_commands_.push_back(std::move(cmd)); } + + return RhiResult::success; } -template -std::initializer_list make_init_list(std::initializer_list &&l) { - return l; +RhiResult GLCommandList::bind_raster_resources(RasterResources *res) { + TI_NOT_IMPLEMENTED; } void GLCommandList::buffer_barrier(DevicePtr ptr, size_t size) { @@ -734,7 +716,6 @@ void GLDevice::image_to_buffer(DevicePtr dst_buf, } GLSurface::~GLSurface() { - TI_NOT_IMPLEMENTED; } StreamSemaphore GLSurface::acquire_next_image() { @@ -772,9 +753,14 @@ void GLCommandList::CmdBindPipeline::execute() { } void GLCommandList::CmdBindBufferToIndex::execute() { - check_opengl_error("before"); - glBindBufferBase(target, index, buffer); - check_opengl_error("glBindBufferBase"); + if (size == -1) { + glBindBufferBase(target, index, buffer); + check_opengl_error("glBindBufferBase"); + } else { + glBindBufferRange(target, index, buffer, GLintptr(offset), + GLsizeiptr(size)); + check_opengl_error("glBindBufferRange"); + } } void GLCommandList::CmdBindTextureToIndex::execute() { diff --git a/taichi/rhi/opengl/opengl_device.h b/taichi/rhi/opengl/opengl_device.h index f792e9be0595b..7645aaf1c002f 100644 --- a/taichi/rhi/opengl/opengl_device.h +++ b/taichi/rhi/opengl/opengl_device.h @@ -12,53 +12,37 @@ class GLDevice; void check_opengl_error(const std::string &msg = "OpenGL"); -class GLResourceBinder : public ResourceBinder { +class GLResourceSet : public ShaderResourceSet { public: - ~GLResourceBinder() override; + GLResourceSet() = default; + explicit GLResourceSet(const GLResourceSet &other) = default; - struct Bindings { - // OpenGL has no sets, default set = 0 - uint32_t binding{0}; - GLuint buffer{0}; - GLuint image{0}; + ~GLResourceSet() override; + + GLResourceSet &rw_buffer(uint32_t binding, DevicePtr ptr, size_t size) final; + GLResourceSet &rw_buffer(uint32_t binding, DeviceAllocation alloc) final; + + GLResourceSet &buffer(uint32_t binding, DevicePtr ptr, size_t size) final; + GLResourceSet &buffer(uint32_t binding, DeviceAllocation alloc) final; + + GLResourceSet &image(uint32_t binding, + DeviceAllocation alloc, + ImageSamplerConfig sampler_config) final; + GLResourceSet &rw_image(uint32_t binding, + DeviceAllocation alloc, + int lod) final; + + struct BufferBinding { + GLuint buffer; + size_t offset; + size_t size; }; - void rw_buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) override; - void rw_buffer(uint32_t set, - uint32_t binding, - DeviceAllocation alloc) override; - - void buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) override; - void buffer(uint32_t set, uint32_t binding, DeviceAllocation alloc) override; - - void image(uint32_t set, - uint32_t binding, - DeviceAllocation alloc, - ImageSamplerConfig sampler_config) override; - void rw_image(uint32_t set, - uint32_t binding, - DeviceAllocation alloc, - int lod) override; - - // Set vertex buffer (not implemented in compute only device) - void vertex_buffer(DevicePtr ptr, uint32_t binding = 0) override; - - // Set index buffer (not implemented in compute only device) - // index_width = 4 -> uint32 index - // index_width = 2 -> uint16 index - void index_buffer(DevicePtr ptr, size_t index_width) override; - - const std::unordered_map &ssbo_binding_map() { + const std::unordered_map &ssbo_binding_map() { return ssbo_binding_map_; } - const std::unordered_map &ubo_binding_map() { + const std::unordered_map &ubo_binding_map() { return ubo_binding_map_; } @@ -67,8 +51,8 @@ class GLResourceBinder : public ResourceBinder { } private: - std::unordered_map ssbo_binding_map_; - std::unordered_map ubo_binding_map_; + std::unordered_map ssbo_binding_map_; + std::unordered_map ubo_binding_map_; std::unordered_map texture_binding_map_; }; @@ -77,15 +61,12 @@ class GLPipeline : public Pipeline { GLPipeline(const PipelineSourceDesc &desc, const std::string &name); ~GLPipeline() override; - ResourceBinder *resource_binder() override; - GLuint get_program() { return program_id_; } private: GLuint program_id_; - GLResourceBinder binder_; }; class GLCommandList : public CommandList { @@ -95,7 +76,9 @@ class GLCommandList : public CommandList { ~GLCommandList() override; void bind_pipeline(Pipeline *p) override; - void bind_resources(ResourceBinder *binder) override; + RhiResult bind_shader_resources(ShaderResourceSet *res, + int set_index = 0) final; + RhiResult bind_raster_resources(RasterResources *res) final; void buffer_barrier(DevicePtr ptr, size_t size) override; void buffer_barrier(DeviceAllocation alloc) override; void memory_barrier() override; @@ -151,6 +134,8 @@ class GLCommandList : public CommandList { struct CmdBindBufferToIndex : public Cmd { GLuint buffer{0}; GLuint index{0}; + GLuint offset{0}; + GLuint size{0}; GLenum target{GL_SHADER_STORAGE_BUFFER}; void execute() override; }; @@ -250,6 +235,14 @@ class GLDevice : public GraphicsDevice { const PipelineSourceDesc &src, std::string name = "Pipeline") override; + ShaderResourceSet *create_resource_set() final { + return new GLResourceSet; + } + + RasterResources *create_raster_resources() final { + TI_NOT_IMPLEMENTED; + } + // Mapping can fail and will return nullptr RhiResult map_range(DevicePtr ptr, uint64_t size, void **mapped_ptr) final; RhiResult map(DeviceAllocation alloc, void **mapped_ptr) final; diff --git a/taichi/rhi/vulkan/vulkan_api.cpp b/taichi/rhi/vulkan/vulkan_api.cpp index 949fd0618100d..ab40e35757079 100644 --- a/taichi/rhi/vulkan/vulkan_api.cpp +++ b/taichi/rhi/vulkan/vulkan_api.cpp @@ -42,6 +42,10 @@ DeviceObjVkPipeline::~DeviceObjVkPipeline() { vkDestroyPipeline(device, pipeline, nullptr); } +DeviceObjVkSampler::~DeviceObjVkSampler() { + vkDestroySampler(device, sampler, nullptr); +} + DeviceObjVkImage::~DeviceObjVkImage() { if (allocation) { vmaDestroyImage(allocator, image, allocation); @@ -146,7 +150,10 @@ IVkDescriptorPool create_descriptor_pool( obj->device = device; VkResult res = vkCreateDescriptorPool(device, create_info, nullptr, &obj->pool); - BAIL_ON_VK_BAD_RESULT_NO_RETURN(res, "failed to create descriptor pool"); + if (res != VK_SUCCESS) { + // All failure condition listed in spec are OOM + return nullptr; + } return obj; } @@ -245,7 +252,7 @@ IVkPipelineLayout create_pipeline_layout( std::vector layouts; layouts.reserve(set_layouts.size()); - for (auto l : set_layouts) { + for (auto &l : set_layouts) { layouts.push_back(l->layout); } @@ -416,6 +423,17 @@ IVkPipeline create_raytracing_pipeline( return obj; } +IVkSampler create_sampler(VkDevice device, const VkSamplerCreateInfo &info) { + IVkSampler sampler = std::make_shared(); + sampler->device = device; + + BAIL_ON_VK_BAD_RESULT_NO_RETURN( + vkCreateSampler(device, &info, nullptr, &sampler->sampler), + "failed to create texture sampler!"); + + return sampler; +} + IVkImage create_image(VkDevice device, VmaAllocator allocator, VkImageCreateInfo *image_info, diff --git a/taichi/rhi/vulkan/vulkan_api.h b/taichi/rhi/vulkan/vulkan_api.h index a20224aad4e94..39dc3f82f5684 100644 --- a/taichi/rhi/vulkan/vulkan_api.h +++ b/taichi/rhi/vulkan/vulkan_api.h @@ -65,7 +65,7 @@ struct DeviceObjVkDescriptorSet : public DeviceObj { VkDescriptorSet set{VK_NULL_HANDLE}; IVkDescriptorSetLayout ref_layout{nullptr}; IVkDescriptorPool ref_pool{nullptr}; - std::unordered_map ref_binding_objs; + std::vector ref_binding_objs; ~DeviceObjVkDescriptorSet() override; }; using IVkDescriptorSet = std::shared_ptr; @@ -172,6 +172,14 @@ IVkPipeline create_raytracing_pipeline( IVkPipelineCache cache = nullptr, IVkPipeline base_pipeline = nullptr); +// VkSampler +struct DeviceObjVkSampler : public DeviceObj { + VkSampler sampler{VK_NULL_HANDLE}; + ~DeviceObjVkSampler() override; +}; +using IVkSampler = std::shared_ptr; +IVkSampler create_sampler(VkDevice device, const VkSamplerCreateInfo &info); + // VkImage struct DeviceObjVkImage : public DeviceObj { VkImage image{VK_NULL_HANDLE}; diff --git a/taichi/rhi/vulkan/vulkan_device.cpp b/taichi/rhi/vulkan/vulkan_device.cpp index b080ae1a67457..b9ca65e42faf1 100644 --- a/taichi/rhi/vulkan/vulkan_device.cpp +++ b/taichi/rhi/vulkan/vulkan_device.cpp @@ -140,7 +140,9 @@ RhiReturn blend_factor_ti_to_vk(BlendFactor factor) { } VulkanPipeline::VulkanPipeline(const Params ¶ms) - : device_(params.device->vk_device()), name_(params.name) { + : ti_device_(*params.device), + device_(params.device->vk_device()), + name_(params.name) { create_descriptor_set_layout(params); create_shader_stages(params); create_pipeline_layout(); @@ -157,7 +159,9 @@ VulkanPipeline::VulkanPipeline( const RasterParams &raster_params, const std::vector &vertex_inputs, const std::vector &vertex_attrs) - : device_(params.device->vk_device()), name_(params.name) { + : ti_device_(*params.device), + device_(params.device->vk_device()), + name_(params.name) { this->graphics_pipeline_template_ = std::make_unique(); @@ -218,7 +222,7 @@ vkapi::IVkPipeline VulkanPipeline::graphics_pipeline_dynamic( color_attachment_formats.push_back(color_attachment.first); } - VkPipelineRenderingCreateInfoKHR rendering_info; + VkPipelineRenderingCreateInfoKHR rendering_info{}; rendering_info.sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO_KHR; rendering_info.pNext = nullptr; rendering_info.viewMask = 0; @@ -238,8 +242,6 @@ vkapi::IVkPipeline VulkanPipeline::graphics_pipeline_dynamic( } void VulkanPipeline::create_descriptor_set_layout(const Params ¶ms) { - std::unordered_set sets_used; - for (auto &code_view : params.code) { SpvReflectShaderModule module; SpvReflectResult result = @@ -255,31 +257,31 @@ void VulkanPipeline::create_descriptor_set_layout(const Params ¶ms) { RHI_ASSERT(result == SPV_REFLECT_RESULT_SUCCESS); for (SpvReflectDescriptorSet *desc_set : desc_sets) { - uint32_t set = desc_set->set; + uint32_t set_index = desc_set->set; + if (set_templates_.find(set_index) == set_templates_.end()) { + set_templates_.insert({set_index, VulkanResourceSet(&ti_device_)}); + } + VulkanResourceSet &set = set_templates_.at(set_index); + for (int i = 0; i < desc_set->binding_count; i++) { SpvReflectDescriptorBinding *desc_binding = desc_set->bindings[i]; if (desc_binding->descriptor_type == SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_BUFFER) { - resource_binder_.rw_buffer(set, desc_binding->binding, kDeviceNullPtr, - 0); + set.rw_buffer(desc_binding->binding, kDeviceNullPtr, 0); } else if (desc_binding->descriptor_type == SPV_REFLECT_DESCRIPTOR_TYPE_UNIFORM_BUFFER) { - resource_binder_.buffer(set, desc_binding->binding, kDeviceNullPtr, - 0); + set.buffer(desc_binding->binding, kDeviceNullPtr, 0); } else if (desc_binding->descriptor_type == SPV_REFLECT_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { - resource_binder_.image(set, desc_binding->binding, - kDeviceNullAllocation, {}); + set.image(desc_binding->binding, kDeviceNullAllocation, {}); } else if (desc_binding->descriptor_type == SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_IMAGE) { - resource_binder_.rw_image(set, desc_binding->binding, - kDeviceNullAllocation, {}); + set.rw_image(desc_binding->binding, kDeviceNullAllocation, {}); } else { RHI_LOG_ERROR("Unrecognized binding ignored"); } } - sets_used.insert(set); } // Handle special vertex shaders stuff @@ -335,14 +337,21 @@ void VulkanPipeline::create_descriptor_set_layout(const Params ¶ms) { spvReflectDestroyShaderModule(&module); } - for (uint32_t set : sets_used) { - vkapi::IVkDescriptorSetLayout layout = - params.device->get_desc_set_layout(resource_binder_.get_set(set)); + // A program can have no binding sets at all. + if (set_templates_.size()) { + // We need to verify the set layouts are all continous + uint32_t max_set = 0; + for (auto &[index, layout_template] : set_templates_) { + max_set = std::max(index, max_set); + } + RHI_ASSERT(max_set + 1 == set_templates_.size() && + "Sets must be continous & start with 0"); - set_layouts_.push_back(layout); + set_layouts_.resize(set_templates_.size(), nullptr); + for (auto &[index, layout_template] : set_templates_) { + set_layouts_[index] = ti_device_.get_desc_set_layout(layout_template); + } } - - resource_binder_.lock_layout(); } void VulkanPipeline::create_shader_stages(const Params ¶ms) { @@ -381,7 +390,7 @@ void VulkanPipeline::create_graphics_pipeline( const std::vector &vertex_inputs, const std::vector &vertex_attrs) { // Use dynamic viewport state. These two are just dummies - VkViewport viewport; + VkViewport viewport{}; viewport.width = 1; viewport.height = 1; viewport.x = 0; @@ -389,9 +398,7 @@ void VulkanPipeline::create_graphics_pipeline( viewport.minDepth = 0.0; viewport.maxDepth = 1.0; - VkRect2D scissor; - scissor.offset = {0, 0}; - scissor.extent = {1, 1}; + VkRect2D scissor{/*offset*/ {0, 0}, /*extent*/ {1, 1}}; VkPipelineViewportStateCreateInfo &viewport_state = graphics_pipeline_template_->viewport_state; @@ -580,244 +587,218 @@ void VulkanPipeline::create_graphics_pipeline( pipeline_info.basePipelineHandle = VK_NULL_HANDLE; } -VulkanResourceBinder::VulkanResourceBinder(VkPipelineBindPoint bind_point) - : bind_point_(bind_point) { +VulkanResourceSet::VulkanResourceSet(VulkanDevice *device) : device_(device) { } -VulkanResourceBinder::~VulkanResourceBinder() { - for (auto &set_pair : sets_) { - Set &set = set_pair.second; - for (auto &binding_pair : set.bindings) { - VkSampler sampler = binding_pair.second.sampler; - if (sampler != VK_NULL_HANDLE) { - Device *dev = binding_pair.second.ptr.device; - vkDestroySampler(static_cast(dev)->vk_device(), sampler, - kNoVkAllocCallbacks); - } - } - } +VulkanResourceSet::~VulkanResourceSet() { } -VkSampler create_sampler(ImageSamplerConfig config, VkDevice device) { - VkSampler sampler = VK_NULL_HANDLE; - - // todo: fill these using the information from the ImageSamplerConfig - VkSamplerCreateInfo sampler_info{}; - sampler_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; - sampler_info.magFilter = VK_FILTER_LINEAR; - sampler_info.minFilter = VK_FILTER_LINEAR; - sampler_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT; - sampler_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT; - sampler_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; - sampler_info.anisotropyEnable = VK_FALSE; - sampler_info.borderColor = VK_BORDER_COLOR_INT_OPAQUE_BLACK; - sampler_info.unnormalizedCoordinates = VK_FALSE; - sampler_info.compareEnable = VK_FALSE; - sampler_info.compareOp = VK_COMPARE_OP_ALWAYS; - sampler_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; - - if (vkCreateSampler(device, &sampler_info, nullptr, &sampler) != VK_SUCCESS) { - throw std::runtime_error("failed to create texture sampler!"); - } - return sampler; -} - -#define CHECK_SET_BINDINGS \ - bool set_not_found = (sets_.find(set) == sets_.end()); \ - if (set_not_found) { \ - if (layout_locked_) { \ - return; \ - } else { \ - sets_[set] = {}; \ - } \ - } \ - auto &bindings = sets_.at(set).bindings; \ - if (layout_locked_ && bindings.find(binding) == bindings.end()) { \ - return; \ - } - -void VulkanResourceBinder::rw_buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) { - CHECK_SET_BINDINGS; - - if (layout_locked_) { - RHI_ASSERT(bindings.at(binding).type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); - } else { - if (bindings.find(binding) != bindings.end()) { - RHI_LOG_ERROR("Overriding last binding"); - } - } +ShaderResourceSet &VulkanResourceSet::rw_buffer(uint32_t binding, + DevicePtr ptr, + size_t size) { + dirty_ = true; - Binding new_binding = {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, ptr, size}; - bindings[binding] = new_binding; + vkapi::IVkBuffer buffer = + (ptr != kDeviceNullPtr) ? device_->get_vkbuffer(ptr) : nullptr; + bindings_[binding] = {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + Buffer{buffer, ptr.offset, size}}; + return *this; } -void VulkanResourceBinder::rw_buffer(uint32_t set, - uint32_t binding, - DeviceAllocation alloc) { - rw_buffer(set, binding, alloc.get_ptr(0), VK_WHOLE_SIZE); +ShaderResourceSet &VulkanResourceSet::rw_buffer(uint32_t binding, + DeviceAllocation alloc) { + return rw_buffer(binding, alloc.get_ptr(0), VK_WHOLE_SIZE); } -void VulkanResourceBinder::buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) { - CHECK_SET_BINDINGS; - - if (layout_locked_) { - RHI_ASSERT(bindings.at(binding).type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); - } else { - if (bindings.find(binding) != bindings.end()) { - RHI_LOG_ERROR("Overriding last binding"); - } - } +ShaderResourceSet &VulkanResourceSet::buffer(uint32_t binding, + DevicePtr ptr, + size_t size) { + dirty_ = true; - Binding new_binding = {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, ptr, size}; - bindings[binding] = new_binding; + vkapi::IVkBuffer buffer = + (ptr != kDeviceNullPtr) ? device_->get_vkbuffer(ptr) : nullptr; + bindings_[binding] = {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + Buffer{buffer, ptr.offset, size}}; + return *this; } -void VulkanResourceBinder::buffer(uint32_t set, - uint32_t binding, - DeviceAllocation alloc) { - buffer(set, binding, alloc.get_ptr(0), VK_WHOLE_SIZE); +ShaderResourceSet &VulkanResourceSet::buffer(uint32_t binding, + DeviceAllocation alloc) { + return buffer(binding, alloc.get_ptr(0), VK_WHOLE_SIZE); } -void VulkanResourceBinder::image(uint32_t set, - uint32_t binding, - DeviceAllocation alloc, - ImageSamplerConfig sampler_config) { - CHECK_SET_BINDINGS - if (layout_locked_) { - RHI_ASSERT(bindings.at(binding).type == - VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER); - } else { - if (bindings.find(binding) != bindings.end()) { - RHI_LOG_ERROR("Overriding last binding"); - } - } - if (bindings[binding].sampler != VK_NULL_HANDLE) { - Device *dev = bindings[binding].ptr.device; - vkDestroySampler(static_cast(dev)->vk_device(), - bindings[binding].sampler, kNoVkAllocCallbacks); - } - bindings[binding] = {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - alloc.get_ptr(0), VK_WHOLE_SIZE}; - if (alloc.device) { - VulkanDevice *device = static_cast(alloc.device); - bindings[binding].sampler = - create_sampler(sampler_config, device->vk_device()); - } -} +ShaderResourceSet &VulkanResourceSet::image(uint32_t binding, + DeviceAllocation alloc, + ImageSamplerConfig sampler_config) { + dirty_ = true; -void VulkanResourceBinder::rw_image(uint32_t set, - uint32_t binding, - DeviceAllocation alloc, - int lod) { - CHECK_SET_BINDINGS - if (layout_locked_) { - RHI_ASSERT(bindings.at(binding).type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE); - } else { - if (bindings.find(binding) != bindings.end()) { - RHI_LOG_ERROR("Overriding last binding"); - } + vkapi::IVkSampler sampler = nullptr; + vkapi::IVkImageView view = nullptr; + + if (alloc != kDeviceNullAllocation) { + VkSamplerCreateInfo sampler_info{}; + sampler_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + sampler_info.magFilter = VK_FILTER_LINEAR; + sampler_info.minFilter = VK_FILTER_LINEAR; + sampler_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_info.anisotropyEnable = VK_FALSE; + sampler_info.borderColor = VK_BORDER_COLOR_INT_OPAQUE_BLACK; + sampler_info.unnormalizedCoordinates = VK_FALSE; + sampler_info.compareEnable = VK_FALSE; + sampler_info.compareOp = VK_COMPARE_OP_ALWAYS; + sampler_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + + sampler = vkapi::create_sampler(device_->vk_device(), sampler_info); + view = device_->get_vk_imageview(alloc); } - bindings[binding] = {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, alloc.get_ptr(0), - VK_WHOLE_SIZE}; -} -#undef CHECK_SET_BINDINGS + bindings_[binding] = {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + Texture{view, sampler}}; -void VulkanResourceBinder::vertex_buffer(DevicePtr ptr, uint32_t binding) { - vertex_buffers_[binding] = ptr; + return *this; } -void VulkanResourceBinder::index_buffer(DevicePtr ptr, size_t index_width) { - index_buffer_ = ptr; - if (index_width == 32) { - index_type_ = VK_INDEX_TYPE_UINT32; - } else if (index_width == 16) { - index_type_ = VK_INDEX_TYPE_UINT16; - } else { - RHI_LOG_ERROR("unsupported index width"); - } +ShaderResourceSet &VulkanResourceSet::rw_image(uint32_t binding, + DeviceAllocation alloc, + int lod) { + dirty_ = true; + + vkapi::IVkImageView view = (alloc != kDeviceNullAllocation) + ? device_->get_vk_lod_imageview(alloc, lod) + : nullptr; + + bindings_[binding] = {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, Image{view}}; + + return *this; } -void VulkanResourceBinder::write_to_set(uint32_t index, - VulkanDevice &device, - vkapi::IVkDescriptorSet set) { - std::vector buffer_infos; - std::vector image_infos; - std::vector is_image; - std::vector desc_writes; +RhiReturn VulkanResourceSet::finalize() { + if (!dirty_ && set_) { + // If nothing changed directly return the set + return {RhiResult::success, set_}; + } - for (auto &pair : sets_.at(index).bindings) { - uint32_t binding = pair.first; + if (bindings_.size() <= 0) { + // A set can't be empty + return {RhiResult::invalid_usage, nullptr}; + } - if (pair.second.ptr != kDeviceNullPtr) { - VkDescriptorBufferInfo &buffer_info = buffer_infos.emplace_back(); - VkDescriptorImageInfo &image_info = image_infos.emplace_back(); - - if (pair.second.type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER || - pair.second.type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) { - auto buffer = device.get_vkbuffer(pair.second.ptr); - buffer_info.buffer = buffer->buffer; - buffer_info.offset = pair.second.ptr.offset; - buffer_info.range = pair.second.size; - is_image.push_back(false); - set->ref_binding_objs[binding] = buffer; - } else if (pair.second.type == - VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { - auto view = std::get<1>(device.get_vk_image(pair.second.ptr)); - image_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - image_info.imageView = view->view; - image_info.sampler = pair.second.sampler; - is_image.push_back(true); - set->ref_binding_objs[binding] = view; - } else if (pair.second.type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) { - auto view = - device.get_vk_lod_imageview(pair.second.ptr, pair.second.image_lod); - image_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL; - image_info.imageView = view->view; - is_image.push_back(true); - set->ref_binding_objs[binding] = view; - } else { - RHI_LOG_ERROR("Ignoring unsupported Descriptor Type"); - } + vkapi::IVkDescriptorSetLayout new_layout = + device_->get_desc_set_layout(*this); + if (new_layout != layout_) { + // Layout changed, reset `set` + set_ = nullptr; + layout_ = new_layout; + } - VkWriteDescriptorSet &write = desc_writes.emplace_back(); - write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - write.pNext = nullptr; - write.dstSet = set->set; - write.dstBinding = binding; - write.dstArrayElement = 0; - write.descriptorCount = 1; - write.descriptorType = pair.second.type; - write.pImageInfo = nullptr; - write.pBufferInfo = nullptr; - write.pTexelBufferView = nullptr; + if (!set_) { + // If set_ is null, create a new one + auto [status, new_set] = device_->alloc_desc_set(layout_); + if (status != RhiResult::success) { + return {status, nullptr}; } + set_ = new_set; } - // Set these pointers later as std::vector resize can relocate the pointers - int i = 0; - for (auto &write : desc_writes) { - if (is_image[i]) { - write.pImageInfo = &image_infos[i]; + std::forward_list buffer_infos; + std::forward_list image_infos; + std::vector desc_writes; + + for (auto &pair : bindings_) { + uint32_t binding = pair.first; + VkDescriptorType type = pair.second.type; + auto &resource = pair.second.res; + + VkWriteDescriptorSet &write = desc_writes.emplace_back(); + write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write.pNext = nullptr; + write.dstSet = set_->set; + write.dstBinding = binding; + write.dstArrayElement = 0; + write.descriptorCount = 1; + write.descriptorType = type; + write.pImageInfo = nullptr; + write.pBufferInfo = nullptr; + write.pTexelBufferView = nullptr; + + if (Buffer *buf = std::get_if(&resource)) { + VkDescriptorBufferInfo &buffer_info = buffer_infos.emplace_front(); + buffer_info.buffer = buf->buffer ? buf->buffer->buffer : VK_NULL_HANDLE; + buffer_info.offset = buf->offset; + buffer_info.range = buf->size; + + write.pBufferInfo = &buffer_info; + if (buf->buffer) { + set_->ref_binding_objs.push_back(buf->buffer); + } + } else if (Image *img = std::get_if(&resource)) { + VkDescriptorImageInfo &image_info = image_infos.emplace_front(); + image_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + image_info.imageView = img->view ? img->view->view : VK_NULL_HANDLE; + image_info.sampler = VK_NULL_HANDLE; + + write.pImageInfo = &image_info; + if (img->view) { + set_->ref_binding_objs.push_back(img->view); + } + } else if (Texture *tex = std::get_if(&resource)) { + VkDescriptorImageInfo &image_info = image_infos.emplace_front(); + image_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + image_info.imageView = tex->view ? tex->view->view : VK_NULL_HANDLE; + image_info.sampler = + tex->sampler ? tex->sampler->sampler : VK_NULL_HANDLE; + + write.pImageInfo = &image_info; + if (tex->view) { + set_->ref_binding_objs.push_back(tex->view); + } + if (tex->sampler) { + set_->ref_binding_objs.push_back(tex->sampler); + } } else { - write.pBufferInfo = &buffer_infos[i]; + RHI_LOG_ERROR("Ignoring unsupported Descriptor Type"); } - i++; } - vkUpdateDescriptorSets(device.vk_device(), desc_writes.size(), + vkUpdateDescriptorSets(device_->vk_device(), desc_writes.size(), desc_writes.data(), /*descriptorCopyCount=*/0, /*pDescriptorCopies=*/nullptr); + + dirty_ = false; + + return {RhiResult::success, set_}; } -void VulkanResourceBinder::lock_layout() { - layout_locked_ = true; +RasterResources &VulkanRasterResources::vertex_buffer(DevicePtr ptr, + uint32_t binding) { + vkapi::IVkBuffer buffer = + (ptr != kDeviceNullPtr) ? device_->get_vkbuffer(ptr) : nullptr; + if (buffer == nullptr) { + vertex_buffers.erase(binding); + } else { + vertex_buffers[binding] = {buffer, ptr.offset}; + } + return *this; +} + +RasterResources &VulkanRasterResources::index_buffer(DevicePtr ptr, + size_t index_width) { + vkapi::IVkBuffer buffer = + (ptr != kDeviceNullPtr) ? device_->get_vkbuffer(ptr) : nullptr; + if (buffer == nullptr) { + index_binding = BufferBinding(); + index_type = VK_INDEX_TYPE_MAX_ENUM; + } else { + index_binding = {buffer, ptr.offset}; + if (index_width == 32) { + index_type = VK_INDEX_TYPE_UINT32; + } else if (index_width == 16) { + index_type = VK_INDEX_TYPE_UINT16; + } + } + return *this; } VulkanCommandList::VulkanCommandList(VulkanDevice *ti_device, @@ -866,7 +847,7 @@ void VulkanCommandList::bind_pipeline(Pipeline *p) { vkCmdBindPipeline(buffer_->buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, vk_pipeline->pipeline); - VkViewport viewport; + VkViewport viewport{}; viewport.width = viewport_width_; viewport.height = viewport_height_; viewport.x = 0; @@ -874,9 +855,8 @@ void VulkanCommandList::bind_pipeline(Pipeline *p) { viewport.minDepth = 0.0; viewport.maxDepth = 1.0; - VkRect2D scissor; - scissor.offset = {0, 0}; - scissor.extent = {viewport_width_, viewport_height_}; + VkRect2D scissor{/*offset*/ {0, 0}, + /*extent*/ {viewport_width_, viewport_height_}}; vkCmdSetViewport(buffer_->buffer, 0, 1, &viewport); vkCmdSetScissor(buffer_->buffer, 0, 1, &scissor); @@ -892,59 +872,88 @@ void VulkanCommandList::bind_pipeline(Pipeline *p) { current_pipeline_ = pipeline; } -void VulkanCommandList::bind_resources(ResourceBinder *ti_binder) { - VulkanResourceBinder *binder = static_cast(ti_binder); +RhiResult VulkanCommandList::bind_shader_resources(ShaderResourceSet *res, + int set_index) { + VulkanResourceSet *set = static_cast(res); + if (set->get_bindings().size() <= 0) { + return RhiResult::success; + } + + auto [status, vk_set] = set->finalize(); + if (status != RhiResult::success) { + return status; + } - for (auto &pair : binder->get_sets()) { - VkPipelineLayout pipeline_layout = - current_pipeline_->pipeline_layout()->layout; + vkapi::IVkDescriptorSetLayout set_layout = set->get_layout(); - vkapi::IVkDescriptorSetLayout layout = - ti_device_->get_desc_set_layout(pair.second); + if (current_pipeline_->pipeline_layout()->ref_desc_layouts[set_index] != + set_layout) { + // WARN: we have a layout mismatch + RHI_LOG_ERROR("Layout mismatch"); - vkapi::IVkDescriptorSet set = nullptr; + auto &templates = current_pipeline_->get_resource_set_templates(); + VulkanResourceSet &set_template = templates.at(set_index); - if (currently_used_sets_.find(pair.second) != currently_used_sets_.end()) { - set = currently_used_sets_.at(pair.second); + for (const auto &template_binding : set_template.get_bindings()) { + char msg[512]; + snprintf(msg, 512, "Template binding %d: (VkDescriptorType) %d", + template_binding.first, template_binding.second.type); + RHI_LOG_ERROR(msg); } - if (!set) { - set = ti_device_->alloc_desc_set(layout); - binder->write_to_set(pair.first, *ti_device_, set); - currently_used_sets_[pair.second] = set; + for (const auto &binding : set->get_bindings()) { + char msg[512]; + snprintf(msg, 512, "Binding %d: (VkDescriptorType) %d", binding.first, + binding.second.type); + RHI_LOG_ERROR(msg); } - VkPipelineBindPoint bind_point; - if (current_pipeline_->is_graphics()) { - bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS; - } else { - bind_point = VK_PIPELINE_BIND_POINT_COMPUTE; - } + return RhiResult::invalid_usage; + } - vkCmdBindDescriptorSets(buffer_->buffer, bind_point, pipeline_layout, - /*firstSet=*/0, - /*descriptorSetCount=*/1, &set->set, - /*dynamicOffsetCount=*/0, - /*pDynamicOffsets=*/nullptr); - buffer_->refs.push_back(set); - } - - if (current_pipeline_->is_graphics()) { - auto [idx_ptr, type] = binder->get_index_buffer(); - if (idx_ptr.device) { - auto index_buffer = ti_device_->get_vkbuffer(idx_ptr); - vkCmdBindIndexBuffer(buffer_->buffer, index_buffer->buffer, - idx_ptr.offset, type); - buffer_->refs.push_back(index_buffer); - } + VkPipelineLayout pipeline_layout = + current_pipeline_->pipeline_layout()->layout; + VkPipelineBindPoint bind_point = current_pipeline_->is_graphics() + ? VK_PIPELINE_BIND_POINT_GRAPHICS + : VK_PIPELINE_BIND_POINT_COMPUTE; - for (auto [binding, ptr] : binder->get_vertex_buffers()) { - auto buffer = ti_device_->get_vkbuffer(ptr); - vkCmdBindVertexBuffers(buffer_->buffer, binding, 1, &buffer->buffer, - &ptr.offset); - buffer_->refs.push_back(buffer); - } + vkCmdBindDescriptorSets(buffer_->buffer, bind_point, pipeline_layout, + /*firstSet=*/set_index, + /*descriptorSetCount=*/1, &vk_set->set, + /*dynamicOffsetCount=*/0, + /*pDynamicOffsets=*/nullptr); + buffer_->refs.push_back(vk_set); + + return RhiResult::success; +} + +RhiResult VulkanCommandList::bind_raster_resources(RasterResources *_res) { + VulkanRasterResources *res = static_cast(_res); + + if (!current_pipeline_->is_graphics()) { + return RhiResult::invalid_usage; } + + if (res->index_type >= VK_INDEX_TYPE_MAX_ENUM) { + return RhiResult::not_supported; + } + + if (res->index_binding.buffer != nullptr) { + // We have a valid index buffer + vkapi::IVkBuffer index_buffer = res->index_binding.buffer; + vkCmdBindIndexBuffer(buffer_->buffer, index_buffer->buffer, + res->index_binding.offset, res->index_type); + buffer_->refs.push_back(index_buffer); + } + + for (auto &[binding, buffer] : res->vertex_buffers) { + VkDeviceSize offset_vk = buffer.offset; + vkCmdBindVertexBuffers(buffer_->buffer, binding, 1, &buffer.buffer->buffer, + &offset_vk); + buffer_->refs.push_back(buffer.buffer); + } + + return RhiResult::success; } void VulkanCommandList::buffer_barrier(DevicePtr ptr, size_t size) { @@ -952,7 +961,7 @@ void VulkanCommandList::buffer_barrier(DevicePtr ptr, size_t size) { auto buffer = ti_device_->get_vkbuffer(ptr); - VkBufferMemoryBarrier barrier; + VkBufferMemoryBarrier barrier{}; barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; barrier.pNext = nullptr; barrier.buffer = buffer->buffer; @@ -986,7 +995,7 @@ void VulkanCommandList::buffer_barrier(DeviceAllocation alloc) { } void VulkanCommandList::memory_barrier() { - VkMemoryBarrier barrier; + VkMemoryBarrier barrier{}; barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; barrier.pNext = nullptr; barrier.srcAccessMask = @@ -1055,9 +1064,8 @@ void VulkanCommandList::begin_renderpass(int x0, current_renderpass_desc_.color_attachments.clear(); rp_desc.clear_depth = depth_clear; - VkRect2D render_area; - render_area.offset = {x0, y0}; - render_area.extent = {uint32_t(x1 - x0), uint32_t(y1 - y0)}; + VkRect2D render_area{/*offset*/ {x0, y0}, + /*extent*/ {uint32_t(x1 - x0), uint32_t(y1 - y0)}}; viewport_width_ = render_area.extent.width; viewport_height_ = render_area.extent.height; @@ -1106,7 +1114,7 @@ void VulkanCommandList::begin_renderpass(int x0, render_info.pDepthAttachment = nullptr; render_info.pStencilAttachment = nullptr; - VkRenderingAttachmentInfo depth_attachment_info; + VkRenderingAttachmentInfo depth_attachment_info{}; if (depth_attachment) { auto [image, view, format] = ti_device_->get_vk_image(*depth_attachment); rp_desc.depth_attachment = format; @@ -1427,10 +1435,9 @@ void VulkanCommandList::blit_image(DeviceAllocation dst_img, ImageLayout dst_img_layout, ImageLayout src_img_layout, const ImageCopyParams ¶ms) { - VkOffset3D blit_size; - blit_size.x = params.width; - blit_size.y = params.height; - blit_size.z = params.depth; + VkOffset3D blit_size{/*x*/ int(params.width), + /*y*/ int(params.height), + /*z*/ int(params.depth)}; VkImageBlit blit{}; blit.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; blit.srcSubresource.layerCount = 1; @@ -1502,7 +1509,8 @@ void VulkanDevice::init_vulkan_structs(Params ¶ms) { graphics_queue_family_index_ = params.graphics_queue_family_index; create_vma_allocator(); - new_descriptor_pool(); + RHI_ASSERT(new_descriptor_pool() == RhiResult::success && + "Failed to allocate initial descriptor pool"); } VulkanDevice::~VulkanDevice() { @@ -1700,6 +1708,14 @@ void VulkanDevice::dealloc_memory(DeviceAllocation handle) { allocations_.release(&get_alloc_internal(handle)); } +ShaderResourceSet *VulkanDevice::create_resource_set() { + return new VulkanResourceSet(this); +} + +RasterResources *VulkanDevice::create_raster_resources() { + return new VulkanRasterResources(this); +} + uint64_t VulkanDevice::get_memory_physical_pointer(DeviceAllocation handle) { return uint64_t(get_alloc_internal(handle).addr); } @@ -1899,7 +1915,7 @@ std::unique_ptr VulkanDevice::create_raster_pipeline( params.device = this; params.name = name; - for (auto src_desc : src) { + for (auto &src_desc : src) { SpirvCodeView &code = params.code.emplace_back(); code.data = (uint32_t *)src_desc.data; code.size = src_desc.size; @@ -2156,10 +2172,10 @@ vkapi::IVkRenderPass VulkanDevice::get_renderpass( std::vector attachments; std::vector color_attachments; - VkAttachmentReference depth_attachment; + VkAttachmentReference depth_attachment{}; uint32_t i = 0; - for (auto [format, clear] : desc.color_attachments) { + for (auto &[format, clear] : desc.color_attachments) { VkAttachmentDescription &description = attachments.emplace_back(); description.flags = 0; description.format = format; @@ -2231,10 +2247,10 @@ vkapi::IVkRenderPass VulkanDevice::get_renderpass( } vkapi::IVkDescriptorSetLayout VulkanDevice::get_desc_set_layout( - VulkanResourceBinder::Set &set) { + VulkanResourceSet &set) { if (desc_set_layouts_.find(set) == desc_set_layouts_.end()) { std::vector bindings; - for (auto &pair : set.bindings) { + for (const auto &pair : set.get_bindings()) { bindings.push_back(VkDescriptorSetLayoutBinding{ /*binding=*/pair.first, pair.second.type, /*descriptorCount=*/1, VK_SHADER_STAGE_ALL, @@ -2257,20 +2273,22 @@ vkapi::IVkDescriptorSetLayout VulkanDevice::get_desc_set_layout( } } -vkapi::IVkDescriptorSet VulkanDevice::alloc_desc_set( +RhiReturn VulkanDevice::alloc_desc_set( vkapi::IVkDescriptorSetLayout layout) { - // TODO: Currently we assume the calling code has called get_desc_set_layout - // before allocating a desc set. Either we should guard against this or - // maintain this assumption in other parts of the VulkanBackend + // This returns nullptr if can't allocate (OOM or pool is full) vkapi::IVkDescriptorSet set = vkapi::allocate_descriptor_sets(desc_pool_, layout); if (set == nullptr) { - new_descriptor_pool(); + RhiResult status = new_descriptor_pool(); + // Allocating new descriptor pool failed + if (status != RhiResult::success) { + return {status, nullptr}; + } set = vkapi::allocate_descriptor_sets(desc_pool_, layout); } - return set; + return {RhiResult::success, set}; } void VulkanDevice::create_vma_allocator() { @@ -2357,7 +2375,7 @@ void VulkanDevice::create_vma_allocator() { vmaCreateAllocator(&allocatorInfo, &allocator_export_); } -void VulkanDevice::new_descriptor_pool() { +RhiResult VulkanDevice::new_descriptor_pool() { std::vector pool_sizes{ {VK_DESCRIPTOR_TYPE_SAMPLER, 64}, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 256}, @@ -2376,7 +2394,15 @@ void VulkanDevice::new_descriptor_pool() { pool_info.maxSets = 64; pool_info.poolSizeCount = pool_sizes.size(); pool_info.pPoolSizes = pool_sizes.data(); - desc_pool_ = vkapi::create_descriptor_pool(device_, &pool_info); + auto new_desc_pool = vkapi::create_descriptor_pool(device_, &pool_info); + + if (!new_desc_pool) { + return RhiResult::out_of_memory; + } + + desc_pool_ = new_desc_pool; + + return RhiResult::success; } VkPresentModeKHR choose_swap_present_mode( @@ -2540,7 +2566,7 @@ void VulkanSurface::create_swap_chain() { this->width_ = extent.width; this->height_ = extent.height; - VkSwapchainCreateInfoKHR createInfo; + VkSwapchainCreateInfoKHR createInfo{}; createInfo.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR; createInfo.pNext = nullptr; createInfo.flags = 0; @@ -2602,7 +2628,7 @@ void VulkanSurface::create_swap_chain() { } void VulkanSurface::destroy_swap_chain() { - for (auto alloc : swapchain_images_) { + for (auto &alloc : swapchain_images_) { std::get<1>(device_->get_vk_image(alloc)) = nullptr; device_->destroy_image(alloc); } @@ -2644,7 +2670,7 @@ std::pair VulkanSurface::get_size() { StreamSemaphore VulkanSurface::acquire_next_image() { if (!config_.window_handle) { - image_index_ = (image_index_ + 1) % swapchain_images_.size(); + image_index_ = (image_index_ + 1) % uint32_t(swapchain_images_.size()); return nullptr; } else { vkAcquireNextImageKHR(device_->vk_device(), swapchain_, UINT64_MAX, @@ -2694,7 +2720,7 @@ DeviceAllocation VulkanSurface::get_depth_data(DeviceAllocation &depth_alloc) { auto *stream = device_->get_graphics_stream(); auto [w, h] = get_size(); - size_t size_bytes = w * h * 4; + size_t size_bytes = size_t(w * h) * sizeof(float); if (depth_buffer_ == kDeviceNullAllocation) { Device::AllocParams params{size_bytes, /*host_wrtie*/ false, @@ -2725,7 +2751,7 @@ DeviceAllocation VulkanSurface::get_image_data() { auto *stream = device_->get_graphics_stream(); DeviceAllocation img_alloc = swapchain_images_[image_index_]; auto [w, h] = get_size(); - size_t size_bytes = w * h * 4; + size_t size_bytes = size_t(w * h) * sizeof(uint8_t) * 4; /* if (screenshot_image_ == kDeviceNullAllocation) { diff --git a/taichi/rhi/vulkan/vulkan_device.h b/taichi/rhi/vulkan/vulkan_device.h index 9818c965a8321..edd110110ca6e 100644 --- a/taichi/rhi/vulkan/vulkan_device.h +++ b/taichi/rhi/vulkan/vulkan_device.h @@ -1,8 +1,11 @@ #pragma once +#include "taichi/rhi/device.h" #include "taichi/rhi/vulkan/vulkan_api.h" +#include "taichi/rhi/vulkan/vulkan_utils.h" +#include "taichi/common/ref_counted_pool.h" -#include +#include "vk_mem_alloc.h" #ifdef ANDROID #include @@ -13,10 +16,7 @@ #include #include #include - -#include -#include -#include +#include namespace taichi::lang { namespace vulkan { @@ -59,7 +59,7 @@ struct RenderPassDescHasher { std::size_t operator()(const VulkanRenderPassDesc &desc) const { // TODO: Come up with a better hash size_t hash = 0; - for (auto pair : desc.color_attachments) { + for (auto &pair : desc.color_attachments) { hash ^= (size_t(pair.first) + pair.second); hash = (hash << 3) || (hash >> 61); } @@ -70,10 +70,10 @@ struct RenderPassDescHasher { }; struct VulkanFramebufferDesc { - std::vector attachments; - uint32_t width; - uint32_t height; - vkapi::IVkRenderPass renderpass; + std::vector attachments{}; + uint32_t width{0}; + uint32_t height{0}; + vkapi::IVkRenderPass renderpass{nullptr}; bool operator==(const VulkanFramebufferDesc &other) const { return width == other.width && height == other.height && @@ -84,7 +84,7 @@ struct VulkanFramebufferDesc { struct FramebufferDescHasher { std::size_t operator()(const VulkanFramebufferDesc &desc) const { size_t hash = 0; - for (auto view : desc.attachments) { + for (auto &view : desc.attachments) { hash ^= size_t(view->view); hash = (hash << 3) || (hash >> 61); } @@ -95,76 +95,105 @@ struct FramebufferDescHasher { } }; -class VulkanResourceBinder : public ResourceBinder { +class VulkanResourceSet : public ShaderResourceSet { public: - struct Binding { - VkDescriptorType type; - DevicePtr ptr; - VkDeviceSize size; - union { - VkSampler sampler{VK_NULL_HANDLE}; // used only for images - int image_lod; - }; + struct Buffer { + vkapi::IVkBuffer buffer{nullptr}; + VkDeviceSize offset{0}; + VkDeviceSize size{0}; - bool operator==(const Binding &other) const { - return other.type == type && other.ptr == ptr && other.size == size && - other.sampler == sampler; + bool operator==(const Buffer &rhs) const { + return buffer == rhs.buffer && offset == rhs.offset && size == rhs.size; } - bool operator!=(const Binding &other) const { - return !(*this == other); + bool operator!=(const Buffer &rhs) const { + return !(*this == rhs); } }; - struct Set { - std::unordered_map bindings; + struct Image { + vkapi::IVkImageView view{nullptr}; - // The compare function is for the hashmap to locate a set layout - bool operator==(const Set &other) const { - if (other.bindings.size() != bindings.size()) { - return false; - } - for (auto &pair : bindings) { - auto other_binding_iter = other.bindings.find(pair.first); - if (other_binding_iter == other.bindings.end()) { - return false; - } - const Binding &other_binding = other_binding_iter->second; - if (other_binding.type != pair.second.type) { - return false; - } - } - return true; + bool operator==(const Image &rhs) const { + return view == rhs.view; + } + + bool operator!=(const Image &rhs) const { + return view != rhs.view; } + }; - bool operator!=(const Set &other) const { - return !(*this == other); + struct Texture { + vkapi::IVkImageView view{nullptr}; + vkapi::IVkSampler sampler{nullptr}; + + bool operator==(const Texture &rhs) const { + return view == rhs.view && sampler == rhs.sampler; + } + + bool operator!=(const Texture &rhs) const { + return !(*this == rhs); + } + }; + + struct Binding { + VkDescriptorType type{VK_DESCRIPTOR_TYPE_MAX_ENUM}; + std::variant res{Buffer()}; + + bool operator==(const Binding &other) const { + return other.type == type && other.res == res; + } + + bool operator!=(const Binding &other) const { + return other.type != type || other.res != res; + } + + size_t hash() const { + size_t hash = 0; + rhi_impl::hash_combine(hash, int(type)); + if (const Buffer *buf = std::get_if(&res)) { + rhi_impl::hash_combine(hash, (void *)buf->buffer.get()); + rhi_impl::hash_combine(hash, size_t(buf->offset)); + rhi_impl::hash_combine(hash, size_t(buf->size)); + } else if (const Image *img = std::get_if(&res)) { + rhi_impl::hash_combine(hash, (void *)img->view.get()); + } else if (const Texture *tex = std::get_if(&res)) { + rhi_impl::hash_combine(hash, (void *)tex->view.get()); + rhi_impl::hash_combine(hash, (void *)tex->sampler.get()); + } + return hash; } }; + // This hashes the Set Layout struct SetLayoutHasher { - std::size_t operator()(const Set &set) const { - // TODO: Come up with a better hash + std::size_t operator()(const VulkanResourceSet &set) const { + // NOTE: Bindings in this case is ordered, we can use non-commutative + // operations size_t hash = 0; - for (const auto &pair : set.bindings) { - hash = (hash ^ size_t(pair.second.type)) ^ size_t(pair.first); + for (const auto &pair : set.bindings_) { + rhi_impl::hash_combine(hash, pair.first); + // We only care about type in this case + rhi_impl::hash_combine(hash, pair.second.type); } return hash; } }; - struct DescSetCmp { - bool operator()(const Set &a, const Set &b) const { - if (a.bindings.size() != b.bindings.size()) { + // This compares the layout of two sets + struct SetLayoutCmp { + bool operator()(const VulkanResourceSet &lhs, + const VulkanResourceSet &rhs) const { + if (lhs.bindings_.size() != rhs.bindings_.size()) { return false; } - for (auto &pair : a.bindings) { - auto other_binding_iter = b.bindings.find(pair.first); - if (other_binding_iter == b.bindings.end()) { + for (auto &lhs_pair : lhs.bindings_) { + auto rhs_binding_iter = rhs.bindings_.find(lhs_pair.first); + if (rhs_binding_iter == rhs.bindings_.end()) { return false; } - const Binding &other_binding = other_binding_iter->second; - if (other_binding != pair.second) { + const Binding &rhs_binding = rhs_binding_iter->second; + if (rhs_binding.type != lhs_pair.second.type) { return false; } } @@ -172,83 +201,84 @@ class VulkanResourceBinder : public ResourceBinder { } }; + // This hashes the entire set (including resources) struct DescSetHasher { - std::size_t operator()(const Set &set) const { - // TODO: Come up with a better hash + std::size_t operator()(const VulkanResourceSet &set) const { size_t hash = 0; - for (const auto &pair : set.bindings) { - size_t binding_hash = 0; - uint32_t *u32_ptr = (uint32_t *)&pair.second; - static_assert( - sizeof(VulkanResourceBinder::Binding) % sizeof(uint32_t) == 0, - "sizeof(VulkanResourceBinder::Binding) is not a multiple of 4"); - size_t n = sizeof(VulkanResourceBinder::Binding) / sizeof(uint32_t); - for (size_t i = 0; i < n; i++) { - binding_hash = binding_hash ^ u32_ptr[i]; - binding_hash = (binding_hash << 7) | (binding_hash >> (64 - 7)); - } - binding_hash = binding_hash ^ pair.first; - binding_hash = - (binding_hash << pair.first) | (binding_hash >> (64 - pair.first)); - hash = hash ^ binding_hash; + for (const auto &pair : set.bindings_) { + rhi_impl::hash_combine(hash, pair.first); + hash ^= pair.second.hash() + 0x9e3779b9 + (hash << 6) + (hash >> 2); } return hash; } }; - explicit VulkanResourceBinder( - VkPipelineBindPoint bind_point = VK_PIPELINE_BIND_POINT_COMPUTE); - ~VulkanResourceBinder() override; - - void rw_buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) override; - void rw_buffer(uint32_t set, - uint32_t binding, - DeviceAllocation alloc) override; - void buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) override; - void buffer(uint32_t set, uint32_t binding, DeviceAllocation alloc) override; - void image(uint32_t set, - uint32_t binding, - DeviceAllocation alloc, - ImageSamplerConfig sampler_config) override; - void rw_image(uint32_t set, - uint32_t binding, - DeviceAllocation alloc, - int lod) override; - void vertex_buffer(DevicePtr ptr, uint32_t binding = 0) override; - void index_buffer(DevicePtr ptr, size_t index_width) override; - - void write_to_set(uint32_t index, - VulkanDevice &device, - vkapi::IVkDescriptorSet set); - Set &get_set(uint32_t index) { - return sets_[index]; - } - std::unordered_map &get_sets() { - return sets_; + // This compares two sets (including resources) + struct SetCmp { + bool operator()(const VulkanResourceSet &lhs, + const VulkanResourceSet &rhs) const { + return lhs.bindings_ == rhs.bindings_; + } + }; + + explicit VulkanResourceSet(VulkanDevice *device); + VulkanResourceSet(const VulkanResourceSet &other) = default; + ~VulkanResourceSet() override; + + ShaderResourceSet &rw_buffer(uint32_t binding, + DevicePtr ptr, + size_t size) final; + ShaderResourceSet &rw_buffer(uint32_t binding, DeviceAllocation alloc) final; + ShaderResourceSet &buffer(uint32_t binding, DevicePtr ptr, size_t size) final; + ShaderResourceSet &buffer(uint32_t binding, DeviceAllocation alloc) final; + ShaderResourceSet &image(uint32_t binding, + DeviceAllocation alloc, + ImageSamplerConfig sampler_config) final; + ShaderResourceSet &rw_image(uint32_t binding, + DeviceAllocation alloc, + int lod) final; + + rhi_impl::RhiReturn finalize(); + + vkapi::IVkDescriptorSetLayout get_layout() { + return layout_; } - std::unordered_map &get_vertex_buffers() { - return vertex_buffers_; + + const std::map &get_bindings() const { + return bindings_; } - std::pair get_index_buffer() { - return std::make_pair(index_buffer_, index_type_); + + private: + std::map bindings_; + VulkanDevice *device_; + + vkapi::IVkDescriptorSetLayout layout_{nullptr}; + vkapi::IVkDescriptorSet set_{nullptr}; + + bool dirty_{true}; +}; + +class VulkanRasterResources : public RasterResources { + public: + VulkanRasterResources(VulkanDevice *device) : device_(device) { } - void lock_layout(); + struct BufferBinding { + vkapi::IVkBuffer buffer{nullptr}; + size_t offset{0}; + }; - private: - std::unordered_map sets_; - bool layout_locked_{false}; - VkPipelineBindPoint bind_point_; + std::unordered_map vertex_buffers; + BufferBinding index_binding; + VkIndexType index_type{VK_INDEX_TYPE_MAX_ENUM}; + + ~VulkanRasterResources() override = default; + + RasterResources &vertex_buffer(DevicePtr ptr, uint32_t binding = 0) final; + RasterResources &index_buffer(DevicePtr ptr, size_t index_width) final; - std::unordered_map vertex_buffers_; - DevicePtr index_buffer_{kDeviceNullPtr}; - VkIndexType index_type_; + private: + VulkanDevice *device_; }; // VulkanPipeline maps to a vkapi::IVkPipeline, or a SPIR-V module (a GLSL @@ -269,10 +299,6 @@ class VulkanPipeline : public Pipeline { const std::vector &vertex_attrs); ~VulkanPipeline() override; - ResourceBinder *resource_binder() override { - return &resource_binder_; - } - vkapi::IVkPipelineLayout pipeline_layout() const { return pipeline_layout_; } @@ -296,6 +322,11 @@ class VulkanPipeline : public Pipeline { return graphics_pipeline_template_ != nullptr; } + std::unordered_map + &get_resource_set_templates() { + return set_templates_; + } + private: void create_descriptor_set_layout(const Params ¶ms); void create_shader_stages(const Params ¶ms); @@ -328,6 +359,7 @@ class VulkanPipeline : public Pipeline { VkGraphicsPipelineCreateInfo pipeline_info{}; }; + VulkanDevice &ti_device_; // not owned VkDevice device_{VK_NULL_HANDLE}; // not owned std::string name_; @@ -344,7 +376,7 @@ class VulkanPipeline : public Pipeline { RenderPassDescHasher> graphics_pipeline_dynamic_; - VulkanResourceBinder resource_binder_; + std::unordered_map set_templates_; std::vector set_layouts_; std::vector shader_modules_; vkapi::IVkPipeline pipeline_{VK_NULL_HANDLE}; @@ -359,7 +391,9 @@ class VulkanCommandList : public CommandList { ~VulkanCommandList() override; void bind_pipeline(Pipeline *p) override; - void bind_resources(ResourceBinder *binder) override; + RhiResult bind_shader_resources(ShaderResourceSet *res, + int set_index = 0) final; + RhiResult bind_raster_resources(RasterResources *res) final; void buffer_barrier(DevicePtr ptr, size_t size) override; void buffer_barrier(DeviceAllocation alloc) override; void memory_barrier() override; @@ -432,12 +466,6 @@ class VulkanCommandList : public CommandList { vkapi::IVkCommandBuffer buffer_; VulkanPipeline *current_pipeline_{nullptr}; - std::unordered_map - currently_used_sets_; - // Renderpass & raster pipeline std::vector current_dynamic_targets_; VulkanRenderPassDesc current_renderpass_desc_; @@ -470,16 +498,16 @@ class VulkanSurface : public Surface { SurfaceConfig config_; - VulkanDevice *device_; - VkSurfaceKHR surface_; - VkSwapchainKHR swapchain_; - vkapi::IVkSemaphore image_available_; + VulkanDevice *device_{nullptr}; + VkSurfaceKHR surface_{VK_NULL_HANDLE}; + VkSwapchainKHR swapchain_{VK_NULL_HANDLE}; + vkapi::IVkSemaphore image_available_{nullptr}; #ifdef ANDROID - ANativeWindow *window_; + ANativeWindow *window_{nullptr}; #else - GLFWwindow *window_; + GLFWwindow *window_{nullptr}; #endif - BufferFormat image_format_; + BufferFormat image_format_{BufferFormat::unknown}; uint32_t image_index_{0}; @@ -563,13 +591,13 @@ class TI_DLL_EXPORT VulkanDevice : public GraphicsDevice { public: struct Params { PFN_vkGetInstanceProcAddr get_proc_addr{nullptr}; - VkInstance instance; - VkPhysicalDevice physical_device; - VkDevice device; - VkQueue compute_queue; - uint32_t compute_queue_family_index; - VkQueue graphics_queue; - uint32_t graphics_queue_family_index; + VkInstance instance{VK_NULL_HANDLE}; + VkPhysicalDevice physical_device{VK_NULL_HANDLE}; + VkDevice device{VK_NULL_HANDLE}; + VkQueue compute_queue{VK_NULL_HANDLE}; + uint32_t compute_queue_family_index{0}; + VkQueue graphics_queue{VK_NULL_HANDLE}; + uint32_t graphics_queue_family_index{0}; }; VulkanDevice(); @@ -589,6 +617,10 @@ class TI_DLL_EXPORT VulkanDevice : public GraphicsDevice { uint64_t get_memory_physical_pointer(DeviceAllocation handle) override; + ShaderResourceSet *create_resource_set() final; + + RasterResources *create_raster_resources() final; + RhiResult map_range(DevicePtr ptr, uint64_t size, void **mapped_ptr) final; RhiResult map(DeviceAllocation alloc, void **mapped_ptr) final; @@ -670,9 +702,9 @@ class TI_DLL_EXPORT VulkanDevice : public GraphicsDevice { vkapi::IVkFramebuffer get_framebuffer(const VulkanFramebufferDesc &desc); - vkapi::IVkDescriptorSetLayout get_desc_set_layout( - VulkanResourceBinder::Set &set); - vkapi::IVkDescriptorSet alloc_desc_set(vkapi::IVkDescriptorSetLayout layout); + vkapi::IVkDescriptorSetLayout get_desc_set_layout(VulkanResourceSet &set); + rhi_impl::RhiReturn alloc_desc_set( + vkapi::IVkDescriptorSetLayout layout); constexpr VulkanCapabilities &vk_caps() { return vk_caps_; @@ -685,21 +717,21 @@ class TI_DLL_EXPORT VulkanDevice : public GraphicsDevice { friend VulkanSurface; void create_vma_allocator(); - void new_descriptor_pool(); + [[nodiscard]] RhiResult new_descriptor_pool(); VulkanCapabilities vk_caps_; - VkInstance instance_; - VkDevice device_; - VkPhysicalDevice physical_device_; - VmaAllocator allocator_; + VkInstance instance_{VK_NULL_HANDLE}; + VkDevice device_{VK_NULL_HANDLE}; + VkPhysicalDevice physical_device_{VK_NULL_HANDLE}; + VmaAllocator allocator_{nullptr}; VmaAllocator allocator_export_{nullptr}; - VkQueue compute_queue_; - uint32_t compute_queue_family_index_; + VkQueue compute_queue_{VK_NULL_HANDLE}; + uint32_t compute_queue_family_index_{0}; - VkQueue graphics_queue_; - uint32_t graphics_queue_family_index_; + VkQueue graphics_queue_{VK_NULL_HANDLE}; + uint32_t graphics_queue_family_index_{0}; struct ThreadLocalStreams; std::unique_ptr compute_streams_{nullptr}; @@ -722,10 +754,10 @@ class TI_DLL_EXPORT VulkanDevice : public GraphicsDevice { // Images / Image views struct ImageAllocInternal { bool external{false}; - VmaAllocationInfo alloc_info; + VmaAllocationInfo alloc_info{}; vkapi::IVkImage image{nullptr}; vkapi::IVkImageView view{nullptr}; - std::vector view_lods; + std::vector view_lods{}; }; // Since we use the pointer to AllocationInternal as the `alloc_id`, @@ -744,9 +776,10 @@ class TI_DLL_EXPORT VulkanDevice : public GraphicsDevice { framebuffer_pools_; // Descriptors / Layouts / Pools - unordered_map + VulkanResourceSet::SetLayoutHasher, + VulkanResourceSet::SetLayoutCmp> desc_set_layouts_; vkapi::IVkDescriptorPool desc_pool_{nullptr}; diff --git a/taichi/rhi/vulkan/vulkan_device_creator.cpp b/taichi/rhi/vulkan/vulkan_device_creator.cpp index 1d56d557883c8..eb6e9395d30d3 100644 --- a/taichi/rhi/vulkan/vulkan_device_creator.cpp +++ b/taichi/rhi/vulkan/vulkan_device_creator.cpp @@ -332,10 +332,10 @@ void VulkanDeviceCreator::create_instance(uint32_t vk_api_version, } std::unordered_set extensions; - for (auto ext : get_required_extensions(params_.enable_validation_layer)) { + for (auto &ext : get_required_extensions(params_.enable_validation_layer)) { extensions.insert(std::string(ext)); } - for (auto ext : params_.additional_instance_extensions) { + for (auto &ext : params_.additional_instance_extensions) { extensions.insert(std::string(ext)); } diff --git a/taichi/runtime/cuda/jit_cuda.cpp b/taichi/runtime/cuda/jit_cuda.cpp index 98c5538875828..898df3d182d7c 100644 --- a/taichi/runtime/cuda/jit_cuda.cpp +++ b/taichi/runtime/cuda/jit_cuda.cpp @@ -165,7 +165,14 @@ std::string JITSessionCUDA::compile_module_to_ptx( if (kFTZDenorms) { for (llvm::Function &fn : *module) { - fn.addFnAttr("nvptx-f32ftz", "true"); + /* nvptx-f32ftz was deprecated. + * + * https://github.com/llvm/llvm-project/commit/a4451d88ee456304c26d552749aea6a7f5154bde#diff-6fda74ef428299644e9f49a2b0994c0d850a760b89828f655030a114060d075a + */ + fn.addFnAttr("denormal-fp-math-f32", "preserve-sign"); + + // Use unsafe fp math for sqrt.approx instead of sqrt.rn + fn.addFnAttr("unsafe-fp-math", "true"); } } diff --git a/taichi/runtime/gfx/runtime.cpp b/taichi/runtime/gfx/runtime.cpp index 525c8ac223d5c..401332be9c233 100644 --- a/taichi/runtime/gfx/runtime.cpp +++ b/taichi/runtime/gfx/runtime.cpp @@ -464,19 +464,23 @@ void GfxRuntime::launch_kernel(KernelHandle handle, RuntimeContext *host_ctx) { const int group_x = (attribs.advisory_total_num_threads + attribs.advisory_num_threads_per_group - 1) / attribs.advisory_num_threads_per_group; - ResourceBinder *binder = vp->resource_binder(); + std::unique_ptr bindings = + device_->create_resource_set_unique(); for (auto &bind : attribs.buffer_binds) { + // We might have to bind a invalid buffer (this is fine as long as + // shader don't do anything with it) if (bind.buffer.type == BufferType::ExtArr) { - binder->rw_buffer(0, bind.binding, any_arrays.at(bind.buffer.root_id)); - } else if (args_buffer && bind.buffer.type == BufferType::Args) { - binder->buffer(0, bind.binding, *args_buffer); - } else if (ret_buffer && bind.buffer.type == BufferType::Rets) { - binder->rw_buffer(0, bind.binding, *ret_buffer); + bindings->rw_buffer(bind.binding, any_arrays.at(bind.buffer.root_id)); + } else if (bind.buffer.type == BufferType::Args) { + bindings->buffer(bind.binding, + args_buffer ? *args_buffer : kDeviceNullAllocation); + } else if (bind.buffer.type == BufferType::Rets) { + bindings->rw_buffer(bind.binding, + ret_buffer ? *ret_buffer : kDeviceNullAllocation); } else { DeviceAllocation *alloc = ti_kernel->get_buffer_bind(bind.buffer); - if (alloc) { - binder->rw_buffer(0, bind.binding, *alloc); - } + bindings->rw_buffer(bind.binding, + alloc ? *alloc : kDeviceNullAllocation); } } @@ -484,10 +488,10 @@ void GfxRuntime::launch_kernel(KernelHandle handle, RuntimeContext *host_ctx) { DeviceAllocation texture = textures.at(bind.arg_id); if (bind.is_storage) { transition_image(texture, ImageLayout::shader_read_write); - binder->rw_image(0, bind.binding, texture, 0); + bindings->rw_image(bind.binding, texture, 0); } else { transition_image(texture, ImageLayout::shader_read); - binder->image(0, bind.binding, texture, {}); + bindings->image(bind.binding, texture, {}); } } @@ -506,7 +510,9 @@ void GfxRuntime::launch_kernel(KernelHandle handle, RuntimeContext *host_ctx) { } current_cmdlist_->bind_pipeline(vp); - current_cmdlist_->bind_resources(binder); + RhiResult status = current_cmdlist_->bind_shader_resources(bindings.get()); + TI_ERROR_IF(status != RhiResult::success, + "Resource binding error : RhiResult({})", status); current_cmdlist_->dispatch(group_x); current_cmdlist_->memory_barrier(); } diff --git a/taichi/runtime/llvm/CMakeLists.txt b/taichi/runtime/llvm/CMakeLists.txt index bd8448d63f86a..2c39493415ea2 100644 --- a/taichi/runtime/llvm/CMakeLists.txt +++ b/taichi/runtime/llvm/CMakeLists.txt @@ -33,6 +33,11 @@ if (TI_WITH_CUDA) target_link_libraries(llvm_runtime PRIVATE cuda_rhi) endif() +if (TI_WITH_AMDGPU) + target_link_libraries(llvm_runtime PRIVATE ${llvm_ptx_libs}) + target_link_libraries(llvm_runtime PRIVATE amdgpu_rhi) +endif() + if (TI_WITH_DX12) target_link_libraries(llvm_runtime PRIVATE ${llvm_directx_libs}) target_link_libraries(llvm_runtime PRIVATE dx12_rhi) diff --git a/taichi/runtime/llvm/llvm_runtime_executor.cpp b/taichi/runtime/llvm/llvm_runtime_executor.cpp index 15968f299f626..b137aba54f632 100644 --- a/taichi/runtime/llvm/llvm_runtime_executor.cpp +++ b/taichi/runtime/llvm/llvm_runtime_executor.cpp @@ -6,11 +6,17 @@ #include "taichi/rhi/cuda/cuda_device.h" #include "taichi/platform/cuda/detect_cuda.h" #include "taichi/rhi/cuda/cuda_driver.h" +#include "taichi/rhi/amdgpu/amdgpu_driver.h" +#include "taichi/rhi/amdgpu/amdgpu_device.h" #if defined(TI_WITH_CUDA) #include "taichi/rhi/cuda/cuda_context.h" #endif +#if defined(TI_WITH_AMDGPU) +#include "taichi/rhi/amdgpu/amdgpu_context.h" +#endif + namespace taichi::lang { namespace { void assert_failed_host(const char *msg) { @@ -118,7 +124,7 @@ LlvmRuntimeExecutor::LlvmRuntimeExecutor(CompileConfig &config, #if defined(TI_WITH_AMDGPU) if (config.arch == Arch::amdgpu) { AMDGPUContext::get_instance().set_debug(config.debug); - device_ = std::make_shared(); + device_ = std::make_shared(); this->maybe_initialize_amdgpu_llvm_context(); } diff --git a/taichi/transforms/eliminate_immutable_local_vars.cpp b/taichi/transforms/eliminate_immutable_local_vars.cpp index 036e96459f574..6fb1533823e5a 100644 --- a/taichi/transforms/eliminate_immutable_local_vars.cpp +++ b/taichi/transforms/eliminate_immutable_local_vars.cpp @@ -15,26 +15,29 @@ class EliminateImmutableLocalVars : public BasicStmtVisitor { private: using BasicStmtVisitor::visit; - DelayedIRModifier modifier_; std::unordered_set immutable_local_vars_; std::unordered_map immutable_local_var_to_value_; + ImmediateIRModifier immediate_modifier_; + DelayedIRModifier delayed_modifier_; public: explicit EliminateImmutableLocalVars( - const std::unordered_set &immutable_local_vars) - : immutable_local_vars_(immutable_local_vars) { + const std::unordered_set &immutable_local_vars, + IRNode *node) + : immutable_local_vars_(immutable_local_vars), immediate_modifier_(node) { } void visit(AllocaStmt *stmt) override { if (immutable_local_vars_.find(stmt) != immutable_local_vars_.end()) { - modifier_.erase(stmt); + delayed_modifier_.erase(stmt); } } void visit(LocalLoadStmt *stmt) override { if (immutable_local_vars_.find(stmt->src) != immutable_local_vars_.end()) { - stmt->replace_usages_with(immutable_local_var_to_value_[stmt->src]); - modifier_.erase(stmt); + immediate_modifier_.replace_usages_with( + stmt, immutable_local_var_to_value_[stmt->src]); + delayed_modifier_.erase(stmt); } } @@ -43,15 +46,15 @@ class EliminateImmutableLocalVars : public BasicStmtVisitor { TI_ASSERT(immutable_local_var_to_value_.find(stmt->dest) == immutable_local_var_to_value_.end()); immutable_local_var_to_value_[stmt->dest] = stmt->val; - modifier_.erase(stmt); + delayed_modifier_.erase(stmt); } } static void run(IRNode *node) { EliminateImmutableLocalVars pass( - irpass::analysis::gather_immutable_local_vars(node)); + irpass::analysis::gather_immutable_local_vars(node), node); node->accept(&pass); - pass.modifier_.modify_ir(); + pass.delayed_modifier_.modify_ir(); } }; diff --git a/taichi/transforms/scalarize.cpp b/taichi/transforms/scalarize.cpp index bf55ed1538d5c..2c2ecfddee2d9 100644 --- a/taichi/transforms/scalarize.cpp +++ b/taichi/transforms/scalarize.cpp @@ -20,12 +20,13 @@ static bool is_alloca_scalarizable(AllocaStmt *stmt) { class Scalarize : public BasicStmtVisitor { public: - DelayedIRModifier modifier_; + ImmediateIRModifier immediate_modifier_; + DelayedIRModifier delayed_modifier_; - explicit Scalarize(IRNode *node) { + explicit Scalarize(IRNode *node) : immediate_modifier_(node) { node->accept(this); - modifier_.modify_ir(); + delayed_modifier_.modify_ir(); } /* @@ -75,12 +76,12 @@ class Scalarize : public BasicStmtVisitor { auto scalarized_stmt = std::make_unique(matrix_ptr_stmt.get(), matrix_init_stmt->values[i]); - modifier_.insert_before(stmt, std::move(const_stmt)); - modifier_.insert_before(stmt, std::move(matrix_ptr_stmt)); - modifier_.insert_before(stmt, std::move(scalarized_stmt)); + delayed_modifier_.insert_before(stmt, std::move(const_stmt)); + delayed_modifier_.insert_before(stmt, std::move(matrix_ptr_stmt)); + delayed_modifier_.insert_before(stmt, std::move(scalarized_stmt)); } - modifier_.erase(stmt); + delayed_modifier_.erase(stmt); } } @@ -127,19 +128,19 @@ class Scalarize : public BasicStmtVisitor { matrix_init_values.push_back(scalarized_stmt.get()); - modifier_.insert_before(stmt, std::move(const_stmt)); - modifier_.insert_before(stmt, std::move(matrix_ptr_stmt)); - modifier_.insert_before(stmt, std::move(scalarized_stmt)); + delayed_modifier_.insert_before(stmt, std::move(const_stmt)); + delayed_modifier_.insert_before(stmt, std::move(matrix_ptr_stmt)); + delayed_modifier_.insert_before(stmt, std::move(scalarized_stmt)); } auto matrix_init_stmt = std::make_unique(matrix_init_values); matrix_init_stmt->ret_type = src_dtype; - stmt->replace_usages_with(matrix_init_stmt.get()); - modifier_.insert_before(stmt, std::move(matrix_init_stmt)); + immediate_modifier_.replace_usages_with(stmt, matrix_init_stmt.get()); + delayed_modifier_.insert_before(stmt, std::move(matrix_init_stmt)); - modifier_.erase(stmt); + delayed_modifier_.erase(stmt); } } @@ -186,17 +187,17 @@ class Scalarize : public BasicStmtVisitor { unary_stmt->ret_type = primitive_type; matrix_init_values.push_back(unary_stmt.get()); - modifier_.insert_before(stmt, std::move(unary_stmt)); + delayed_modifier_.insert_before(stmt, std::move(unary_stmt)); } auto matrix_init_stmt = std::make_unique(matrix_init_values); matrix_init_stmt->ret_type = operand_dtype; - stmt->replace_usages_with(matrix_init_stmt.get()); - modifier_.insert_before(stmt, std::move(matrix_init_stmt)); + immediate_modifier_.replace_usages_with(stmt, matrix_init_stmt.get()); + delayed_modifier_.insert_before(stmt, std::move(matrix_init_stmt)); - modifier_.erase(stmt); + delayed_modifier_.erase(stmt); } } @@ -256,17 +257,17 @@ class Scalarize : public BasicStmtVisitor { matrix_init_values.push_back(binary_stmt.get()); binary_stmt->ret_type = primitive_type; - modifier_.insert_before(stmt, std::move(binary_stmt)); + delayed_modifier_.insert_before(stmt, std::move(binary_stmt)); } auto matrix_init_stmt = std::make_unique(matrix_init_values); matrix_init_stmt->ret_type = stmt->ret_type; - stmt->replace_usages_with(matrix_init_stmt.get()); - modifier_.insert_before(stmt, std::move(matrix_init_stmt)); + immediate_modifier_.replace_usages_with(stmt, matrix_init_stmt.get()); + delayed_modifier_.insert_before(stmt, std::move(matrix_init_stmt)); - modifier_.erase(stmt); + delayed_modifier_.erase(stmt); } } @@ -334,8 +335,9 @@ class Scalarize : public BasicStmtVisitor { if (!merged_string.empty()) merged_contents.push_back(merged_string); - modifier_.insert_before(stmt, Stmt::make(merged_contents)); - modifier_.erase(stmt); + delayed_modifier_.insert_before(stmt, + Stmt::make(merged_contents)); + delayed_modifier_.erase(stmt); } /* @@ -403,19 +405,19 @@ class Scalarize : public BasicStmtVisitor { matrix_init_values.push_back(atomic_stmt.get()); - modifier_.insert_before(stmt, std::move(const_stmt)); - modifier_.insert_before(stmt, std::move(matrix_ptr_stmt)); - modifier_.insert_before(stmt, std::move(atomic_stmt)); + delayed_modifier_.insert_before(stmt, std::move(const_stmt)); + delayed_modifier_.insert_before(stmt, std::move(matrix_ptr_stmt)); + delayed_modifier_.insert_before(stmt, std::move(atomic_stmt)); } auto matrix_init_stmt = std::make_unique(matrix_init_values); matrix_init_stmt->ret_type = stmt->ret_type; - stmt->replace_usages_with(matrix_init_stmt.get()); - modifier_.insert_before(stmt, std::move(matrix_init_stmt)); + immediate_modifier_.replace_usages_with(stmt, matrix_init_stmt.get()); + delayed_modifier_.insert_before(stmt, std::move(matrix_init_stmt)); - modifier_.erase(stmt); + delayed_modifier_.erase(stmt); } } @@ -487,17 +489,17 @@ class Scalarize : public BasicStmtVisitor { matrix_init_values.push_back(ternary_stmt.get()); ternary_stmt->ret_type = primitive_type; - modifier_.insert_before(stmt, std::move(ternary_stmt)); + delayed_modifier_.insert_before(stmt, std::move(ternary_stmt)); } auto matrix_init_stmt = std::make_unique(matrix_init_values); matrix_init_stmt->ret_type = stmt->ret_type; - stmt->replace_usages_with(matrix_init_stmt.get()); - modifier_.insert_before(stmt, std::move(matrix_init_stmt)); + immediate_modifier_.replace_usages_with(stmt, matrix_init_stmt.get()); + delayed_modifier_.insert_before(stmt, std::move(matrix_init_stmt)); - modifier_.erase(stmt); + delayed_modifier_.erase(stmt); } } @@ -522,10 +524,10 @@ class Scalarize : public BasicStmtVisitor { auto arg_load = std::make_unique(stmt->arg_id, ret_type, stmt->is_ptr); - stmt->replace_usages_with(arg_load.get()); + immediate_modifier_.replace_usages_with(stmt, arg_load.get()); - modifier_.insert_before(stmt, std::move(arg_load)); - modifier_.erase(stmt); + delayed_modifier_.insert_before(stmt, std::move(arg_load)); + delayed_modifier_.erase(stmt); } private: @@ -534,15 +536,16 @@ class Scalarize : public BasicStmtVisitor { class ScalarizePointers : public BasicStmtVisitor { public: - DelayedIRModifier modifier_; + ImmediateIRModifier immediate_modifier_; + DelayedIRModifier delayed_modifier_; // { original_alloca_stmt : [scalarized_alloca_stmt0, ...] } std::unordered_map> scalarized_local_tensor_map_; - explicit ScalarizePointers(IRNode *node) { + explicit ScalarizePointers(IRNode *node) : immediate_modifier_(node) { node->accept(this); - modifier_.modify_ir(); + delayed_modifier_.modify_ir(); } /* @@ -584,10 +587,11 @@ class ScalarizePointers : public BasicStmtVisitor { scalarized_local_tensor_map_[stmt].push_back( scalarized_alloca_stmt.get()); - modifier_.insert_before(stmt, std::move(scalarized_alloca_stmt)); + delayed_modifier_.insert_before(stmt, + std::move(scalarized_alloca_stmt)); } - modifier_.erase(stmt); + delayed_modifier_.erase(stmt); } } @@ -617,7 +621,7 @@ class ScalarizePointers : public BasicStmtVisitor { // handled if (!stmt->offset->is()) { // Removing this line will fail TI_ASSERT in ~DelayedIRModifier() - modifier_.modify_ir(); + delayed_modifier_.modify_ir(); throw TaichiSyntaxError(fmt::format( "{}The index of a Matrix/Vector must be a compile-time constant " "integer.\n" @@ -638,8 +642,8 @@ class ScalarizePointers : public BasicStmtVisitor { TI_ASSERT(offset < scalarized_alloca_stmts.size()); auto alloca_stmt = scalarized_alloca_stmts[offset]; - stmt->replace_usages_with(alloca_stmt); - modifier_.erase(stmt); + immediate_modifier_.replace_usages_with(stmt, alloca_stmt); + delayed_modifier_.erase(stmt); } } } diff --git a/taichi/ui/backends/vulkan/renderable.cpp b/taichi/ui/backends/vulkan/renderable.cpp index b59ce3cd268cf..b8ad7b72c4f27 100644 --- a/taichi/ui/backends/vulkan/renderable.cpp +++ b/taichi/ui/backends/vulkan/renderable.cpp @@ -165,9 +165,15 @@ const Pipeline &Renderable::pipeline() const { } void Renderable::create_bindings() { - ResourceBinder *binder = pipeline_->resource_binder(); - binder->vertex_buffer(vertex_buffer_.get_ptr(0), 0); - binder->index_buffer(index_buffer_.get_ptr(0), 32); + if (!resource_set_) { + resource_set_ = app_context_->device().create_resource_set_unique(); + } + if (!raster_state_) { + raster_state_ = app_context_->device().create_raster_resources_unique(); + } + + raster_state_->vertex_buffer(vertex_buffer_.get_ptr(0), 0); + raster_state_->index_buffer(index_buffer_.get_ptr(0), 32); } void Renderable::create_graphics_pipeline() { @@ -287,13 +293,16 @@ void Renderable::destroy_storage_buffers() { } void Renderable::cleanup() { + resource_set_.reset(); + raster_state_.reset(); free_buffers(); pipeline_.reset(); } void Renderable::record_this_frame_commands(CommandList *command_list) { command_list->bind_pipeline(pipeline_.get()); - command_list->bind_resources(pipeline_->resource_binder()); + command_list->bind_raster_resources(raster_state_.get()); + command_list->bind_shader_resources(resource_set_.get()); if (indexed_) { command_list->draw_indexed(config_.draw_index_count, diff --git a/taichi/ui/backends/vulkan/renderable.h b/taichi/ui/backends/vulkan/renderable.h index 6568212f75177..024a2eadf5de5 100644 --- a/taichi/ui/backends/vulkan/renderable.h +++ b/taichi/ui/backends/vulkan/renderable.h @@ -67,6 +67,8 @@ class Renderable { AppContext *app_context_; std::unique_ptr pipeline_{nullptr}; + std::unique_ptr resource_set_{nullptr}; + std::unique_ptr raster_state_{nullptr}; taichi::lang::DeviceAllocation vertex_buffer_; taichi::lang::DeviceAllocation index_buffer_; diff --git a/taichi/ui/backends/vulkan/renderables/circles.cpp b/taichi/ui/backends/vulkan/renderables/circles.cpp index 187ae58541905..ec42731a9a040 100644 --- a/taichi/ui/backends/vulkan/renderables/circles.cpp +++ b/taichi/ui/backends/vulkan/renderables/circles.cpp @@ -61,8 +61,7 @@ void Circles::update_ubo(glm::vec3 color, void Circles::create_bindings() { Renderable::create_bindings(); - ResourceBinder *binder = pipeline_->resource_binder(); - binder->buffer(0, 0, uniform_buffer_); + resource_set_->buffer(0, uniform_buffer_); } } // namespace vulkan diff --git a/taichi/ui/backends/vulkan/renderables/lines.cpp b/taichi/ui/backends/vulkan/renderables/lines.cpp index 8f426a2275220..2fdbb292640a7 100644 --- a/taichi/ui/backends/vulkan/renderables/lines.cpp +++ b/taichi/ui/backends/vulkan/renderables/lines.cpp @@ -59,13 +59,13 @@ void Lines::update_ubo(glm::vec3 color, bool use_per_vertex_color) { void Lines::create_bindings() { Renderable::create_bindings(); - ResourceBinder *binder = pipeline_->resource_binder(); - binder->buffer(0, 0, uniform_buffer_); + resource_set_->buffer(0, uniform_buffer_); } void Lines::record_this_frame_commands(CommandList *command_list) { command_list->bind_pipeline(pipeline_.get()); - command_list->bind_resources(pipeline_->resource_binder()); + command_list->bind_raster_resources(raster_state_.get()); + command_list->bind_shader_resources(resource_set_.get()); command_list->set_line_width(curr_width_ * app_context_->config.height); if (indexed_) { diff --git a/taichi/ui/backends/vulkan/renderables/mesh.cpp b/taichi/ui/backends/vulkan/renderables/mesh.cpp index bccb5fb14ead1..b139da0f4a46e 100644 --- a/taichi/ui/backends/vulkan/renderables/mesh.cpp +++ b/taichi/ui/backends/vulkan/renderables/mesh.cpp @@ -118,7 +118,8 @@ void Mesh::update_data(const MeshInfo &info, const Scene &scene) { void Mesh::record_this_frame_commands(taichi::lang::CommandList *command_list) { command_list->bind_pipeline(pipeline_.get()); - command_list->bind_resources(pipeline_->resource_binder()); + command_list->bind_raster_resources(raster_state_.get()); + command_list->bind_shader_resources(resource_set_.get()); if (indexed_) { command_list->draw_indexed_instance( @@ -161,10 +162,9 @@ void Mesh::init_mesh(AppContext *app_context, void Mesh::create_bindings() { Renderable::create_bindings(); - ResourceBinder *binder = pipeline_->resource_binder(); - binder->buffer(0, 0, uniform_buffer_); - binder->rw_buffer(0, 1, storage_buffer_); - binder->rw_buffer(0, 2, mesh_storage_buffer_); + resource_set_->buffer(0, uniform_buffer_); + resource_set_->rw_buffer(1, storage_buffer_); + resource_set_->rw_buffer(2, mesh_storage_buffer_); } void Mesh::create_mesh_storage_buffers() { diff --git a/taichi/ui/backends/vulkan/renderables/particles.cpp b/taichi/ui/backends/vulkan/renderables/particles.cpp index 8c8ce70699fe2..8368aef6690bd 100644 --- a/taichi/ui/backends/vulkan/renderables/particles.cpp +++ b/taichi/ui/backends/vulkan/renderables/particles.cpp @@ -80,9 +80,8 @@ void Particles::init_particles(AppContext *app_context, void Particles::create_bindings() { Renderable::create_bindings(); - ResourceBinder *binder = pipeline_->resource_binder(); - binder->buffer(0, 0, uniform_buffer_); - binder->rw_buffer(0, 1, storage_buffer_); + resource_set_->buffer(0, uniform_buffer_); + resource_set_->rw_buffer(1, storage_buffer_); } } // namespace vulkan diff --git a/taichi/ui/backends/vulkan/renderables/scene_lines.cpp b/taichi/ui/backends/vulkan/renderables/scene_lines.cpp index 4b2e12b39bc1b..fa53c981504b2 100644 --- a/taichi/ui/backends/vulkan/renderables/scene_lines.cpp +++ b/taichi/ui/backends/vulkan/renderables/scene_lines.cpp @@ -60,14 +60,14 @@ void SceneLines::update_ubo(const SceneLinesInfo &info, const Scene &scene) { void SceneLines::create_bindings() { Renderable::create_bindings(); - ResourceBinder *binder = pipeline_->resource_binder(); - binder->buffer(0, 0, uniform_buffer_); - binder->rw_buffer(0, 1, storage_buffer_); + resource_set_->buffer(0, uniform_buffer_); + resource_set_->rw_buffer(1, storage_buffer_); } void SceneLines::record_this_frame_commands(CommandList *command_list) { command_list->bind_pipeline(pipeline_.get()); - command_list->bind_resources(pipeline_->resource_binder()); + command_list->bind_raster_resources(raster_state_.get()); + command_list->bind_shader_resources(resource_set_.get()); command_list->set_line_width(curr_width_); if (indexed_) { diff --git a/taichi/ui/backends/vulkan/renderables/set_image.cpp b/taichi/ui/backends/vulkan/renderables/set_image.cpp index 2bbae13b38dc0..80661db26324b 100644 --- a/taichi/ui/backends/vulkan/renderables/set_image.cpp +++ b/taichi/ui/backends/vulkan/renderables/set_image.cpp @@ -321,9 +321,8 @@ void SetImage::update_index_buffer() { void SetImage::create_bindings() { Renderable::create_bindings(); - ResourceBinder *binder = pipeline_->resource_binder(); - binder->image(0, 0, texture_, {}); - binder->buffer(0, 1, uniform_buffer_); + resource_set_->image(0, texture_, {}); + resource_set_->buffer(1, uniform_buffer_); } void SetImage::cleanup() { diff --git a/taichi/ui/backends/vulkan/renderables/triangles.cpp b/taichi/ui/backends/vulkan/renderables/triangles.cpp index 45d9a14d13592..1b6debd557dea 100644 --- a/taichi/ui/backends/vulkan/renderables/triangles.cpp +++ b/taichi/ui/backends/vulkan/renderables/triangles.cpp @@ -55,8 +55,7 @@ void Triangles::update_ubo(glm::vec3 color, bool use_per_vertex_color) { void Triangles::create_bindings() { Renderable::create_bindings(); - ResourceBinder *binder = pipeline_->resource_binder(); - binder->buffer(0, 0, uniform_buffer_); + resource_set_->buffer(0, uniform_buffer_); } } // namespace vulkan diff --git a/tests/python/test_ggui.py b/tests/python/test_ggui.py index 64ed46a3367a0..19ef43e501543 100644 --- a/tests/python/test_ggui.py +++ b/tests/python/test_ggui.py @@ -456,9 +456,11 @@ def render(): render() if (platform.system() == 'Darwin'): # TODO:Fix the bug that mac not support wide lines - verify_image(window.get_image_buffer_as_numpy(), 'test_draw_lines.mac') + verify_image(window.get_image_buffer_as_numpy(), 'test_draw_lines.mac', + 0.2) else: - verify_image(window.get_image_buffer_as_numpy(), 'test_draw_lines') + verify_image(window.get_image_buffer_as_numpy(), 'test_draw_lines', + 0.2) window.destroy()