Skip to content

Commit

Permalink
[vulkan] Reduce runtime host overhead (#4282)
Browse files Browse the repository at this point in the history
* Trying to improve Vulkan JIT runtime performance

* More perf things

* Update taichi/backends/vulkan/runtime.cpp

Co-authored-by: Bo Qiao <[email protected]>

* Remove unused var

Co-authored-by: Bo Qiao <[email protected]>
  • Loading branch information
bobcao3 and qiao-bo authored Feb 16, 2022
1 parent 9683e59 commit 063d630
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 8 deletions.
15 changes: 15 additions & 0 deletions taichi/backends/vulkan/runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,7 @@ void CompiledTaichiKernel::generate_command_list(
VkRuntime::VkRuntime(const Params &params)
: device_(params.device), host_result_buffer_(params.host_result_buffer) {
TI_ASSERT(host_result_buffer_ != nullptr);
current_cmdlist_pending_since_ = high_res_clock::now();
init_buffers();
}

Expand Down Expand Up @@ -536,6 +537,7 @@ void VkRuntime::launch_kernel(KernelHandle handle, RuntimeContext *host_ctx) {
// Create new command list if current one is nullptr
if (!current_cmdlist_) {
ctx_buffers_.clear();
current_cmdlist_pending_since_ = high_res_clock::now();
current_cmdlist_ = device_->get_compute_stream()->new_command_list();
}

Expand All @@ -560,6 +562,19 @@ void VkRuntime::launch_kernel(KernelHandle handle, RuntimeContext *host_ctx) {
}
}

// If we have accumulated some work but does not require sync
// and if the accumulated cmdlist has been pending for some time
// launch the cmdlist to start processing.
if (current_cmdlist_) {
constexpr uint64_t max_pending_time = 2000; // 2000us = 2ms
auto duration = high_res_clock::now() - current_cmdlist_pending_since_;
if (std::chrono::duration_cast<std::chrono::microseconds>(duration)
.count() > max_pending_time) {
device_->get_compute_stream()->submit(current_cmdlist_.get());
current_cmdlist_ = nullptr;
}
}

// Dealloc external arrays
for (auto pair : any_arrays) {
if (pair.second != kDeviceNullAllocation) {
Expand Down
4 changes: 4 additions & 0 deletions taichi/backends/vulkan/runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "taichi/lang_util.h"

#include <vector>
#include <chrono>

#include "taichi/backends/device.h"
#include "taichi/codegen/spirv/snode_struct_compiler.h"
Expand All @@ -22,6 +23,8 @@ using BufferInfo = TaskAttributes::BufferInfo;
using BufferBind = TaskAttributes::BufferBind;
using BufferInfoHasher = TaskAttributes::BufferInfoHasher;

using high_res_clock = std::chrono::high_resolution_clock;

// TODO: In the future this isn't necessarily a pointer, since DeviceAllocation
// is already a pretty cheap handle>
using InputBuffersMap =
Expand Down Expand Up @@ -123,6 +126,7 @@ class TI_DLL_EXPORT VkRuntime {
std::vector<std::unique_ptr<DeviceAllocationGuard>> ctx_buffers_;

std::unique_ptr<CommandList> current_cmdlist_{nullptr};
high_res_clock::time_point current_cmdlist_pending_since_;

std::vector<std::unique_ptr<CompiledTaichiKernel>> ti_kernels_;

Expand Down
36 changes: 29 additions & 7 deletions taichi/backends/vulkan/vulkan_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,9 @@ void VulkanResourceBinder::rw_buffer(uint32_t set,
TI_WARN("Overriding last binding");
}
}
bindings[binding] = {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, ptr, size};

Binding new_binding = {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, ptr, size};
bindings[binding] = new_binding;
}

void VulkanResourceBinder::rw_buffer(uint32_t set,
Expand All @@ -536,7 +538,9 @@ void VulkanResourceBinder::buffer(uint32_t set,
TI_WARN("Overriding last binding");
}
}
bindings[binding] = {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, ptr, size};

Binding new_binding = {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, ptr, size};
bindings[binding] = new_binding;
}

void VulkanResourceBinder::buffer(uint32_t set,
Expand Down Expand Up @@ -676,6 +680,9 @@ VulkanCommandList::~VulkanCommandList() {
void VulkanCommandList::bind_pipeline(Pipeline *p) {
auto pipeline = static_cast<VulkanPipeline *>(p);

if (current_pipeline_ == pipeline)
return;

if (pipeline->is_graphics()) {
vkapi::IVkPipeline vk_pipeline = pipeline->graphics_pipeline(
current_renderpass_desc_, current_renderpass_);
Expand Down Expand Up @@ -712,10 +719,23 @@ void VulkanCommandList::bind_resources(ResourceBinder *ti_binder) {
VulkanResourceBinder *binder = static_cast<VulkanResourceBinder *>(ti_binder);

for (auto &pair : binder->get_sets()) {
VkPipelineLayout pipeline_layout =
current_pipeline_->pipeline_layout()->layout;

vkapi::IVkDescriptorSetLayout layout =
ti_device_->get_desc_set_layout(pair.second);
vkapi::IVkDescriptorSet set = ti_device_->alloc_desc_set(layout);
binder->write_to_set(pair.first, *ti_device_, set);

vkapi::IVkDescriptorSet set = nullptr;

if (currently_used_sets_.find(pair.second) != currently_used_sets_.end()) {
set = currently_used_sets_.at(pair.second);
}

if (!set) {
set = ti_device_->alloc_desc_set(layout);
binder->write_to_set(pair.first, *ti_device_, set);
currently_used_sets_[pair.second] = set;
}

VkPipelineBindPoint bind_point;
if (current_pipeline_->is_graphics()) {
Expand All @@ -724,8 +744,7 @@ void VulkanCommandList::bind_resources(ResourceBinder *ti_binder) {
bind_point = VK_PIPELINE_BIND_POINT_COMPUTE;
}

vkCmdBindDescriptorSets(buffer_->buffer, bind_point,
current_pipeline_->pipeline_layout()->layout,
vkCmdBindDescriptorSets(buffer_->buffer, bind_point, pipeline_layout,
/*firstSet=*/0,
/*descriptorSetCount=*/1, &set->set,
/*dynamicOffsetCount=*/0,
Expand Down Expand Up @@ -1799,7 +1818,10 @@ vkapi::IVkDescriptorSetLayout VulkanDevice::get_desc_set_layout(
create_info.bindingCount = bindings.size();
create_info.pBindings = bindings.data();

return vkapi::create_descriptor_set_layout(device_, &create_info);
auto layout = vkapi::create_descriptor_set_layout(device_, &create_info);
desc_set_layouts_[set] = layout;

return layout;
} else {
return desc_set_layouts_.at(set);
}
Expand Down
64 changes: 63 additions & 1 deletion taichi/backends/vulkan/vulkan_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,15 @@ class VulkanResourceBinder : public ResourceBinder {
DevicePtr ptr;
VkDeviceSize size;
VkSampler sampler{VK_NULL_HANDLE}; // used only for images

bool operator==(const Binding &other) const {
return other.type == type && other.ptr == ptr && other.size == size &&
other.sampler == sampler;
}

bool operator!=(const Binding &other) const {
return !(*this == other);
}
};

struct Set {
Expand All @@ -113,13 +122,21 @@ class VulkanResourceBinder : public ResourceBinder {
return false;
}
for (auto &pair : bindings) {
const Binding &other_binding = other.bindings.at(pair.first);
auto other_binding_iter = other.bindings.find(pair.first);
if (other_binding_iter == other.bindings.end()) {
return false;
}
const Binding &other_binding = other_binding_iter->second;
if (other_binding.type != pair.second.type) {
return false;
}
}
return true;
}

bool operator!=(const Set &other) const {
return !(*this == other);
}
};

struct SetLayoutHasher {
Expand All @@ -133,6 +150,45 @@ class VulkanResourceBinder : public ResourceBinder {
}
};

struct DescSetCmp {
bool operator()(const Set &a, const Set &b) const {
if (a.bindings.size() != b.bindings.size()) {
return false;
}
for (auto &pair : a.bindings) {
auto other_binding_iter = b.bindings.find(pair.first);
if (other_binding_iter == b.bindings.end()) {
return false;
}
const Binding &other_binding = other_binding_iter->second;
if (other_binding != pair.second) {
return false;
}
}
return true;
}
};

struct DescSetHasher {
std::size_t operator()(const Set &set) const {
// TODO: Come up with a better hash
size_t hash = 0;
for (const auto &pair : set.bindings) {
size_t binding_hash = 0;
uint32_t *u32_ptr = (uint32_t *)&pair.second;
for (int i = 0; i < sizeof(Set) / sizeof(uint32_t); i++) {
binding_hash = binding_hash ^ u32_ptr[i];
binding_hash = (binding_hash << 7) | (binding_hash >> (64 - 7));
}
binding_hash = binding_hash ^ pair.first;
binding_hash =
(binding_hash << pair.first) | (binding_hash >> (64 - pair.first));
hash = hash ^ binding_hash;
}
return hash;
}
};

struct VulkanBindings : public Bindings {
std::vector<
std::pair<vkapi::IVkDescriptorSetLayout, vkapi::IVkDescriptorSet>>
Expand Down Expand Up @@ -353,6 +409,12 @@ class VulkanCommandList : public CommandList {
vkapi::IVkCommandBuffer buffer_;
VulkanPipeline *current_pipeline_{nullptr};

std::unordered_map<VulkanResourceBinder::Set,
vkapi::IVkDescriptorSet,
VulkanResourceBinder::DescSetHasher,
VulkanResourceBinder::DescSetCmp>
currently_used_sets_;

// Renderpass & raster pipeline
VulkanRenderPassDesc current_renderpass_desc_;
vkapi::IVkRenderPass current_renderpass_{VK_NULL_HANDLE};
Expand Down
3 changes: 3 additions & 0 deletions taichi/codegen/spirv/spirv_ir_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -893,10 +893,13 @@ Value IRBuilder::float_atomic(AtomicOpType op_type,
Value new_float = atomic_op(old_float, data);
Value new_val = make_value(spv::OpBitcast, t_uint32_, new_float);
// int loaded = atomicCompSwap(vals[0], old, new);
/*
* Don't need this part, theoretically
auto semantics = uint_immediate_number(
t_uint32_, spv::MemorySemanticsAcquireReleaseMask |
spv::MemorySemanticsUniformMemoryMask);
make_inst(spv::OpMemoryBarrier, const_i32_one_, semantics);
*/
Value loaded = make_value(
spv::OpAtomicCompareExchange, t_uint32_, addr_ptr,
/*scope=*/const_i32_one_, /*semantics if equal=*/const_i32_zero_,
Expand Down

0 comments on commit 063d630

Please sign in to comment.