From 1262a70a9080537be170ceb405bc9ae38a64e7c0 Mon Sep 17 00:00:00 2001 From: Mocki <34432001+Morcki@users.noreply.github.com> Date: Tue, 20 Sep 2022 09:22:45 +0800 Subject: [PATCH] [bug] [gui] Fix a bug of drawing mesh instacing that cpu/cuda objects have an offset when copying to vulkan object (#6028) This only happend on Windows. --- python/taichi/ui/scene.py | 12 +++++++---- taichi/rhi/device.cpp | 9 ++++++++- taichi/rhi/interop/vulkan_cpu_interop.cpp | 20 +++++++++++++++++++ taichi/rhi/interop/vulkan_cpu_interop.h | 2 ++ .../ui/backends/vulkan/renderables/mesh.cpp | 19 +++++++++++------- tests/python/test_ggui.py | 8 -------- 6 files changed, 50 insertions(+), 20 deletions(-) diff --git a/python/taichi/ui/scene.py b/python/taichi/ui/scene.py index 46f95deb413a4..a058ada09ddb8 100644 --- a/python/taichi/ui/scene.py +++ b/python/taichi/ui/scene.py @@ -307,10 +307,14 @@ def mesh_instance(self, index_count = vertex_count else: index_count = indices.shape[0] - if instance_count is None: - instance_count = transforms.shape[0] - if transforms and (transforms.m != 4 or transforms.n != 4): - raise Exception("Error! Transform matrix must be 4x4 shape") + if transforms: + if (transforms.m != 4 or transforms.n != 4): + raise Exception("Error! Transform matrix must be 4x4 shape") + if instance_count is None: + instance_count = transforms.shape[0] + else: + instance_count = 1 + copy_normals_to_vbo(vbo, normals) vbo_info = get_field_info(vbo) indices_info = get_field_info(indices) diff --git a/taichi/rhi/device.cpp b/taichi/rhi/device.cpp index 460cbba420c40..1d3af12cd8022 100644 --- a/taichi/rhi/device.cpp +++ b/taichi/rhi/device.cpp @@ -244,7 +244,14 @@ void Device::memcpy_direct(DevicePtr dst, DevicePtr src, uint64_t size) { dst.device->memcpy_internal(dst, src, size); return; } - // Inter-device copy +#if TI_WITH_VULKAN && TI_WITH_LLVM + // cross-device copy directly + else if (dynamic_cast(dst.device) && + dynamic_cast(src.device)) { + memcpy_cpu_to_vulkan(dst, src, size); + return; + } +#endif #if TI_WITH_VULKAN && TI_WITH_CUDA if (dynamic_cast(dst.device) && dynamic_cast(src.device)) { diff --git a/taichi/rhi/interop/vulkan_cpu_interop.cpp b/taichi/rhi/interop/vulkan_cpu_interop.cpp index e3a1158457cee..598e833977eee 100644 --- a/taichi/rhi/interop/vulkan_cpu_interop.cpp +++ b/taichi/rhi/interop/vulkan_cpu_interop.cpp @@ -15,6 +15,23 @@ namespace lang { using namespace taichi::lang::vulkan; using namespace taichi::lang::cpu; +void memcpy_cpu_to_vulkan(DevicePtr dst, DevicePtr src, uint64_t size) { + // Note that `dst` must point to host-visible memory, if `dst` point to + // device-local memory, please choose to use `memcpy_via_staging`. + VulkanDevice *vk_dev = dynamic_cast(dst.device); + CpuDevice *cpu_dev = dynamic_cast(src.device); + + DeviceAllocation src_alloc(src); + + CpuDevice::AllocInfo src_alloc_info = cpu_dev->get_alloc_info(src_alloc); + + unsigned char *dst_ptr = (unsigned char *)(vk_dev->map_range(dst, size)); + unsigned char *src_ptr = (unsigned char *)src_alloc_info.ptr + src.offset; + + memcpy(dst_ptr, src_ptr, size); + vk_dev->unmap(dst); +} + void memcpy_cpu_to_vulkan_via_staging(DevicePtr dst, DevicePtr staging, DevicePtr src, @@ -39,6 +56,9 @@ void memcpy_cpu_to_vulkan_via_staging(DevicePtr dst, } #else +void memcpy_cpu_to_vulkan(DevicePtr dst, DevicePtr src, uint64_t size) { + TI_NOT_IMPLEMENTED; +} void memcpy_cpu_to_vulkan_via_staging(DevicePtr dst, DevicePtr stagin, DevicePtr src, diff --git a/taichi/rhi/interop/vulkan_cpu_interop.h b/taichi/rhi/interop/vulkan_cpu_interop.h index ac95959f0251f..4042ceca80be9 100644 --- a/taichi/rhi/interop/vulkan_cpu_interop.h +++ b/taichi/rhi/interop/vulkan_cpu_interop.h @@ -5,6 +5,8 @@ namespace taichi { namespace lang { +void memcpy_cpu_to_vulkan(DevicePtr dst, DevicePtr src, uint64_t size); + void memcpy_cpu_to_vulkan_via_staging(DevicePtr dst, DevicePtr staging, DevicePtr src, diff --git a/taichi/ui/backends/vulkan/renderables/mesh.cpp b/taichi/ui/backends/vulkan/renderables/mesh.cpp index e82b62aefc199..66423e09efa90 100644 --- a/taichi/ui/backends/vulkan/renderables/mesh.cpp +++ b/taichi/ui/backends/vulkan/renderables/mesh.cpp @@ -88,18 +88,22 @@ void Mesh::update_data(const MeshInfo &info, const Scene &scene) { attr_dev_ptr = get_device_ptr(prog, info.mesh_attribute_info.mesh_attribute.snode); } + // TODO : At present, we donnot support copying from cuda device memory to a + // host-visible vulkan device memory directly on Windows platform, which is + // not a ideal way for handling storage buffer. So here we set the + // `mesh_ssbo` vulkan buffer as device-local memory using staging buffer + // filling data. However, that is not what is used to do for a storage + // buffer (usually set as host-visible memory), we should f`ix this on + // Windows in future. Device::MemcpyCapability memcpy_cap = Device::check_memcpy_capability( mesh_storage_buffer_.get_ptr(), attr_dev_ptr, mesh_ssbo_size_); if (memcpy_cap == Device::MemcpyCapability::Direct) { Device::memcpy_direct(mesh_storage_buffer_.get_ptr(), attr_dev_ptr, mesh_ssbo_size_); } else if (memcpy_cap == Device::MemcpyCapability::RequiresStagingBuffer) { - void *ssbo_mapped = app_context_->device().map(mesh_storage_buffer_); - DeviceAllocation attr_buffer(attr_dev_ptr); - void *attr_mapped = attr_dev_ptr.device->map(attr_buffer); - memcpy(ssbo_mapped, attr_mapped, mesh_ssbo_size_); - app_context_->device().unmap(mesh_storage_buffer_); - attr_dev_ptr.device->unmap(attr_buffer); + Device::memcpy_via_staging(mesh_storage_buffer_.get_ptr(), + staging_vertex_buffer_.get_ptr(), attr_dev_ptr, + mesh_ssbo_size_); } else { TI_NOT_IMPLEMENTED; } @@ -163,7 +167,8 @@ void Mesh::create_mesh_storage_buffers() { if (mesh_ssbo_size_ == 0) { mesh_ssbo_size_ = 4 * 4 * sizeof(float); } - Device::AllocParams sb_params{mesh_ssbo_size_, true, false, true, + Device::AllocParams sb_params{mesh_ssbo_size_, false, false, + app_context_->requires_export_sharing(), AllocUsage::Storage}; mesh_storage_buffer_ = app_context_->device().allocate_memory(sb_params); } diff --git a/tests/python/test_ggui.py b/tests/python/test_ggui.py index d4a1bb606f26c..4550dcccf1090 100644 --- a/tests/python/test_ggui.py +++ b/tests/python/test_ggui.py @@ -753,10 +753,6 @@ def render(): transforms=instances_transforms) canvas.scene(scene) - if (platform.system() == 'Windows'): - # FIXME:Fix the bug that drawing mesh instance report bugs on Windows - return - for i in range(30): update_transform(30) render() @@ -868,10 +864,6 @@ def render(): instance_offset=2) canvas.scene(scene) - if (platform.system() == 'Windows'): - # FIXME:Fix the bug that drawing mesh instance report bugs on Windows - return - for _ in range(RENDER_REPEAT): render() window.get_image_buffer_as_numpy()