From adabd14d08e89d02d7fd11135e5265f47798a5d4 Mon Sep 17 00:00:00 2001 From: Dario Date: Wed, 24 Apr 2024 14:30:48 -0300 Subject: [PATCH] Add support for enhanced barriers in D3D12. Enables support for enhanced barriers if available. Gets rid of the implementation of [CROSS_FAMILY_FALLBACK] in the D3D12 driver. The logic has been reimplemented at a higher level in RenderingDevice itself. This fallback is only used if the RenderingDeviceDriver reports the API traits and the capability of sharing texture formats correctly. Aliases created in this way can only be used for sampling: never for writing. In most cases, the formats that do not support sharing do not support unordered access/storage writes in the first place. --- .../d3d12/rendering_device_driver_d3d12.cpp | 922 ++++++++++++------ drivers/d3d12/rendering_device_driver_d3d12.h | 31 +- .../vulkan/rendering_device_driver_vulkan.cpp | 121 ++- .../vulkan/rendering_device_driver_vulkan.h | 1 + servers/rendering/rendering_device.cpp | 354 ++++++- servers/rendering/rendering_device.h | 39 + servers/rendering/rendering_device_driver.cpp | 2 + servers/rendering/rendering_device_driver.h | 26 +- servers/rendering/rendering_device_graph.cpp | 141 ++- servers/rendering/rendering_device_graph.h | 26 +- 10 files changed, 1207 insertions(+), 456 deletions(-) diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp index 9407826ebfa6..08ee12991ab6 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.cpp +++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp @@ -538,15 +538,6 @@ void RenderingDeviceDriverD3D12::_resource_transition_batch(ResourceInfo *p_reso #endif ResourceInfo::States *res_states = p_resource->states_ptr; - - if (p_new_state == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) { - if (unlikely(!res_states->xfamily_fallback.subresources_dirty.is_empty())) { - uint32_t subres_qword = p_subresource / 64; - uint64_t subres_mask = (uint64_t(1) << (p_subresource % 64)); - res_states->xfamily_fallback.subresources_dirty[subres_qword] |= subres_mask; - } - } - D3D12_RESOURCE_STATES *curr_state = &res_states->subresource_states[p_subresource]; // Transitions can be considered redundant if the current state has all the bits of the new state. @@ -869,7 +860,7 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel // but also if you give a rounded size at that point because it will extend beyond the // memory of the resource. Therefore, it seems the only way is to create it with a // rounded size. - CD3DX12_RESOURCE_DESC resource_desc = CD3DX12_RESOURCE_DESC::Buffer(STEPIFY(p_size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)); + CD3DX12_RESOURCE_DESC1 resource_desc = CD3DX12_RESOURCE_DESC1::Buffer(STEPIFY(p_size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)); if (p_usage.has_flag(RDD::BUFFER_USAGE_STORAGE_BIT)) { resource_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; } else { @@ -878,7 +869,6 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel D3D12MA::ALLOCATION_DESC allocation_desc = {}; allocation_desc.HeapType = D3D12_HEAP_TYPE_DEFAULT; - D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_COPY_DEST; switch (p_allocation_type) { case MEMORY_ALLOCATION_TYPE_CPU: { bool is_src = p_usage.has_flag(BUFFER_USAGE_TRANSFER_FROM_BIT); @@ -886,7 +876,6 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel if (is_src && !is_dst) { // Looks like a staging buffer: CPU maps, writes sequentially, then GPU copies to VRAM. allocation_desc.HeapType = D3D12_HEAP_TYPE_UPLOAD; - initial_state = D3D12_RESOURCE_STATE_GENERIC_READ; } if (is_dst && !is_src) { // Looks like a readback buffer: GPU copies from VRAM, then CPU maps and reads. @@ -904,13 +893,27 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel ComPtr buffer; ComPtr allocation; - HRESULT res = allocator->CreateResource( - &allocation_desc, - &resource_desc, - initial_state, - nullptr, - allocation.GetAddressOf(), - IID_PPV_ARGS(buffer.GetAddressOf())); + HRESULT res; + if (barrier_capabilities.enhanced_barriers_supported) { + res = allocator->CreateResource3( + &allocation_desc, + &resource_desc, + D3D12_BARRIER_LAYOUT_UNDEFINED, + nullptr, + 0, + nullptr, + allocation.GetAddressOf(), + IID_PPV_ARGS(buffer.GetAddressOf())); + } else { + res = allocator->CreateResource( + &allocation_desc, + reinterpret_cast(&resource_desc), + D3D12_RESOURCE_STATE_COMMON, + nullptr, + allocation.GetAddressOf(), + IID_PPV_ARGS(buffer.GetAddressOf())); + } + ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), BufferID(), "Can't create buffer of size: " + itos(p_size) + ", error " + vformat("0x%08ux", (uint64_t)res) + "."); // Bookkeep. @@ -919,11 +922,10 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel buf_info->resource = buffer.Get(); buf_info->owner_info.resource = buffer; buf_info->owner_info.allocation = allocation; - buf_info->owner_info.states.subresource_states.push_back(initial_state); + buf_info->owner_info.states.subresource_states.push_back(D3D12_RESOURCE_STATE_COMMON); buf_info->states_ptr = &buf_info->owner_info.states; buf_info->size = p_size; buf_info->flags.usable_as_uav = (resource_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); - buf_info->flags.is_for_upload = allocation_desc.HeapType == D3D12_HEAP_TYPE_UPLOAD; return BufferID(buf_info); } @@ -1052,8 +1054,7 @@ UINT RenderingDeviceDriverD3D12::_compute_plane_slice(DataFormat p_format, BitFi if (p_aspect_bits.has_flag(TEXTURE_ASPECT_DEPTH_BIT)) { DEV_ASSERT(aspect == TEXTURE_ASPECT_MAX); aspect = TEXTURE_ASPECT_DEPTH; - } - if (p_aspect_bits.has_flag(TEXTURE_ASPECT_STENCIL_BIT)) { + } else if (p_aspect_bits.has_flag(TEXTURE_ASPECT_STENCIL_BIT)) { DEV_ASSERT(aspect == TEXTURE_ASPECT_MAX); aspect = TEXTURE_ASPECT_STENCIL; } @@ -1080,6 +1081,10 @@ UINT RenderingDeviceDriverD3D12::_compute_plane_slice(DataFormat p_format, Textu } } +UINT RenderingDeviceDriverD3D12::_compute_subresource_from_layers(TextureInfo *p_texture, const TextureSubresourceLayers &p_layers, uint32_t p_layer_offset) { + return D3D12CalcSubresource(p_layers.mipmap, p_layers.base_layer + p_layer_offset, _compute_plane_slice(p_texture->format, p_layers.aspect), p_texture->desc.MipLevels, p_texture->desc.ArraySize()); +} + void RenderingDeviceDriverD3D12::_discard_texture_subresources(const TextureInfo *p_tex_info, const CommandBufferInfo *p_cmd_buf_info) { uint32_t planes = 1; if ((p_tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) { @@ -1117,6 +1122,64 @@ void RenderingDeviceDriverD3D12::_discard_texture_subresources(const TextureInfo } } +bool RenderingDeviceDriverD3D12::_unordered_access_supported_by_format(DataFormat p_format) { + switch (p_format) { + case DATA_FORMAT_R4G4_UNORM_PACK8: + case DATA_FORMAT_R4G4B4A4_UNORM_PACK16: + case DATA_FORMAT_B4G4R4A4_UNORM_PACK16: + case DATA_FORMAT_R5G6B5_UNORM_PACK16: + case DATA_FORMAT_B5G6R5_UNORM_PACK16: + case DATA_FORMAT_R5G5B5A1_UNORM_PACK16: + case DATA_FORMAT_B5G5R5A1_UNORM_PACK16: + case DATA_FORMAT_A1R5G5B5_UNORM_PACK16: + case DATA_FORMAT_A8B8G8R8_UNORM_PACK32: + case DATA_FORMAT_A8B8G8R8_SNORM_PACK32: + case DATA_FORMAT_A8B8G8R8_USCALED_PACK32: + case DATA_FORMAT_A8B8G8R8_SSCALED_PACK32: + case DATA_FORMAT_A8B8G8R8_UINT_PACK32: + case DATA_FORMAT_A8B8G8R8_SINT_PACK32: + case DATA_FORMAT_A8B8G8R8_SRGB_PACK32: + case DATA_FORMAT_A2R10G10B10_UNORM_PACK32: + case DATA_FORMAT_A2R10G10B10_SNORM_PACK32: + case DATA_FORMAT_A2R10G10B10_USCALED_PACK32: + case DATA_FORMAT_A2R10G10B10_SSCALED_PACK32: + case DATA_FORMAT_A2R10G10B10_UINT_PACK32: + case DATA_FORMAT_A2R10G10B10_SINT_PACK32: + case DATA_FORMAT_A2B10G10R10_UNORM_PACK32: + case DATA_FORMAT_A2B10G10R10_SNORM_PACK32: + case DATA_FORMAT_A2B10G10R10_USCALED_PACK32: + case DATA_FORMAT_A2B10G10R10_SSCALED_PACK32: + case DATA_FORMAT_A2B10G10R10_UINT_PACK32: + case DATA_FORMAT_A2B10G10R10_SINT_PACK32: + case DATA_FORMAT_B10G11R11_UFLOAT_PACK32: + case DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32: + case DATA_FORMAT_X8_D24_UNORM_PACK32: + case DATA_FORMAT_R10X6_UNORM_PACK16: + case DATA_FORMAT_R10X6G10X6_UNORM_2PACK16: + case DATA_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16: + case DATA_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16: + case DATA_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16: + case DATA_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16: + case DATA_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16: + case DATA_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16: + case DATA_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16: + case DATA_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16: + case DATA_FORMAT_R12X4_UNORM_PACK16: + case DATA_FORMAT_R12X4G12X4_UNORM_2PACK16: + case DATA_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16: + case DATA_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16: + case DATA_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16: + case DATA_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16: + case DATA_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16: + case DATA_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16: + case DATA_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16: + case DATA_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16: + return false; + default: + return true; + } +} + RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p_format, const TextureView &p_view) { // Using D3D12_RESOURCE_DESC1. Thanks to the layout, it's sliceable down to D3D12_RESOURCE_DESC if needed. CD3DX12_RESOURCE_DESC1 resource_desc = {}; @@ -1137,12 +1200,10 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p resource_desc.Format = RD_TO_D3D12_FORMAT[p_format.format].family; // If views of different families are wanted, special setup is needed for proper sharing among them. - // Two options here: - // 1. If the driver reports relaxed casting is, leverage its new extended resource creation API (via D3D12MA). - // 2. Otherwise, fall back to an approach based on having multiple versions of the resource and copying as needed. [[CROSS_FAMILY_FALLBACK]] + // If the driver reports relaxed casting is, leverage its new extended resource creation API (via D3D12MA). if (p_format.shareable_formats.size() && format_capabilities.relaxed_casting_supported) { relaxed_casting_available = true; - relaxed_casting_formats = ALLOCA_ARRAY(DXGI_FORMAT, p_format.shareable_formats.size()); + relaxed_casting_formats = ALLOCA_ARRAY(DXGI_FORMAT, p_format.shareable_formats.size() + 1); relaxed_casting_formats[0] = RD_TO_D3D12_FORMAT[p_format.format].general_format; relaxed_casting_format_count++; } @@ -1156,9 +1217,9 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p if (RD_TO_D3D12_FORMAT[curr_format].family != RD_TO_D3D12_FORMAT[p_format.format].family) { cross_family_sharing = true; - if (!relaxed_casting_available) { - break; - } + } + + if (relaxed_casting_available) { relaxed_casting_formats[relaxed_casting_format_count] = RD_TO_D3D12_FORMAT[curr_format].general_format; relaxed_casting_format_count++; } @@ -1185,7 +1246,7 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p if ((p_format.usage_bits & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT)) { resource_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; } else { - if ((p_format.usage_bits & TEXTURE_USAGE_CAN_COPY_TO_BIT)) { + if ((p_format.usage_bits & TEXTURE_USAGE_CAN_COPY_TO_BIT) && _unordered_access_supported_by_format(p_format.format)) { resource_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; // For clearing via UAV. } } @@ -1242,17 +1303,19 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p D3D12_CLEAR_VALUE *clear_value_ptr = (resource_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) ? &clear_value : nullptr; { HRESULT res = E_FAIL; - if (cross_family_sharing && relaxed_casting_available) { + if (barrier_capabilities.enhanced_barriers_supported || (cross_family_sharing && relaxed_casting_available)) { + // Create with undefined layout if enhanced barriers are supported. Leave as common otherwise for interop with legacy barriers. + D3D12_BARRIER_LAYOUT initial_layout = barrier_capabilities.enhanced_barriers_supported ? D3D12_BARRIER_LAYOUT_UNDEFINED : D3D12_BARRIER_LAYOUT_COMMON; res = allocator->CreateResource3( &allocation_desc, &resource_desc, - D3D12_BARRIER_LAYOUT_COMMON, // Needed for barrier interop. + initial_layout, clear_value_ptr, relaxed_casting_format_count, relaxed_casting_formats, allocation.GetAddressOf(), IID_PPV_ARGS(main_texture.GetAddressOf())); - initial_state = D3D12_RESOURCE_STATE_COMMON; // Needed for barrier interop. + initial_state = D3D12_RESOURCE_STATE_COMMON; } else { res = allocator->CreateResource( &allocation_desc, @@ -1353,7 +1416,10 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p tex_info->mipmaps = resource_desc.MipLevels; tex_info->view_descs.srv = srv_desc; tex_info->view_descs.uav = uav_desc; - if ((p_format.usage_bits & (TEXTURE_USAGE_STORAGE_BIT | TEXTURE_USAGE_COLOR_ATTACHMENT_BIT))) { + + if (!barrier_capabilities.enhanced_barriers_supported && (p_format.usage_bits & (TEXTURE_USAGE_STORAGE_BIT | TEXTURE_USAGE_COLOR_ATTACHMENT_BIT))) { + // Fallback to clear resources when they're first used in a uniform set. Not necessary if enhanced barriers + // are supported, as the discard flag will be used instead when transitioning from an undefined layout. textures_pending_clear.add(&tex_info->pending_clear); } @@ -1380,45 +1446,8 @@ RDD::TextureID RenderingDeviceDriverD3D12::_texture_create_shared_from_slice(Tex ComPtr new_texture; ComPtr new_allocation; - ID3D12Resource *resource = nullptr; + ID3D12Resource *resource = owner_tex_info->resource; CD3DX12_RESOURCE_DESC new_tex_resource_desc = owner_tex_info->desc; - bool cross_family = RD_TO_D3D12_FORMAT[p_view.format].family != RD_TO_D3D12_FORMAT[owner_tex_info->format].family; - if (cross_family && !format_capabilities.relaxed_casting_supported) { - // [[CROSS_FAMILY_FALLBACK]]. - // We have to create a new texture of the alternative format. - - D3D12MA::ALLOCATION_DESC allocation_desc = {}; - allocation_desc.HeapType = D3D12_HEAP_TYPE_DEFAULT; - allocation_desc.ExtraHeapFlags = D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES; - - if (p_slice_type != -1) { -#ifdef DEV_ENABLED - // Actual slicing is not contemplated. If ever needed, let's at least realize. - if (p_slice_type != -1) { - uint32_t new_texture_subresorce_count = owner_tex_info->mipmaps * owner_tex_info->layers; - uint32_t slice_subresorce_count = p_mipmaps * p_layers; - DEV_ASSERT(new_texture_subresorce_count == slice_subresorce_count); - } -#endif - new_tex_resource_desc.DepthOrArraySize = p_layers; - new_tex_resource_desc.MipLevels = p_mipmaps; - } - new_tex_resource_desc.Format = RD_TO_D3D12_FORMAT[p_view.format].family; - new_tex_resource_desc.Flags = D3D12_RESOURCE_FLAG_NONE; // Alternative formats can only be used as SRVs. - - HRESULT res = allocator->CreateResource( - &allocation_desc, - &new_tex_resource_desc, - D3D12_RESOURCE_STATE_COPY_DEST, - nullptr, - new_allocation.GetAddressOf(), - IID_PPV_ARGS(new_texture.GetAddressOf())); - ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), TextureID(), vformat("D3D12MA::CreateResource failed with error 0x%08ux.", (uint64_t)res)); - - resource = new_texture.Get(); - } else { - resource = owner_tex_info->resource; - } // Describe views. @@ -1528,58 +1557,7 @@ RDD::TextureID RenderingDeviceDriverD3D12::_texture_create_shared_from_slice(Tex TextureInfo *tex_info = VersatileResource::allocate(resources_allocator); tex_info->resource = resource; - if (new_texture.Get()) { - // [[CROSS_FAMILY_FALLBACK]]. - - DEV_ASSERT(cross_family && !format_capabilities.relaxed_casting_supported); - - uint32_t new_texture_subresorce_count = owner_tex_info->mipmaps * owner_tex_info->layers; -#ifdef DEV_ENABLED - // Actual slicing is not contemplated. If ever needed, let's at least realize. - if (p_slice_type != -1) { - uint32_t slice_subresorce_count = p_mipmaps * p_layers; - DEV_ASSERT(new_texture_subresorce_count == slice_subresorce_count); - } -#endif - - tex_info->owner_info.resource = new_texture; - tex_info->owner_info.allocation = new_allocation; - tex_info->owner_info.states.subresource_states.resize(new_texture_subresorce_count); - for (uint32_t i = 0; i < tex_info->owner_info.states.subresource_states.size(); i++) { - tex_info->owner_info.states.subresource_states[i] = D3D12_RESOURCE_STATE_COPY_DEST; - } - tex_info->states_ptr = &tex_info->owner_info.states; - - ResourceInfo::States::CrossFamillyFallback &xfamily = owner_tex_info->owner_info.states.xfamily_fallback; - if (xfamily.subresources_dirty.is_empty()) { - uint32_t items_required = STEPIFY(new_texture_subresorce_count, sizeof(uint64_t)) / sizeof(uint64_t); - xfamily.subresources_dirty.resize(items_required); - memset(xfamily.subresources_dirty.ptr(), 255, sizeof(uint64_t) * xfamily.subresources_dirty.size()); - - // Create buffer for non-direct copy if it's a format not supporting reinterpret-copy. - DEV_ASSERT(!xfamily.interim_buffer.Get()); - if (owner_tex_info->format == DATA_FORMAT_R16_UINT && p_view.format == DATA_FORMAT_R4G4B4A4_UNORM_PACK16) { - uint32_t row_pitch = STEPIFY(owner_tex_info->desc.Width * sizeof(uint16_t), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); - uint32_t buffer_size = sizeof(uint16_t) * row_pitch * owner_tex_info->desc.Height * owner_tex_info->desc.Depth(); - CD3DX12_RESOURCE_DESC resource_desc = CD3DX12_RESOURCE_DESC::Buffer(STEPIFY(buffer_size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)); - resource_desc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; - - D3D12MA::ALLOCATION_DESC allocation_desc = {}; - allocation_desc.HeapType = D3D12_HEAP_TYPE_DEFAULT; - - HRESULT res = allocator->CreateResource( - &allocation_desc, - &resource_desc, - D3D12_RESOURCE_STATE_COPY_SOURCE, // Makes the code that makes the copy easier. - nullptr, - xfamily.interim_buffer_alloc.GetAddressOf(), - IID_PPV_ARGS(xfamily.interim_buffer.GetAddressOf())); - ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), TextureID(), "D3D12MA::CreateResource failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); - } - } - } else { - tex_info->states_ptr = owner_tex_info->states_ptr; - } + tex_info->states_ptr = owner_tex_info->states_ptr; tex_info->format = p_view.format; tex_info->desc = new_tex_resource_desc; if (p_slice_type == -1) { @@ -1710,6 +1688,28 @@ BitField RenderingDeviceDriverD3D12::texture_get_usages_s return supported; } +bool RenderingDeviceDriverD3D12::texture_can_make_shared_with_format(TextureID p_texture, DataFormat p_format, bool &r_raw_reinterpretation) { + r_raw_reinterpretation = false; + + if (format_capabilities.relaxed_casting_supported) { + // Relaxed casting is supported, there should be no need to check for format family compatibility. + return true; + } else { + TextureInfo *tex_info = (TextureInfo *)p_texture.id; + if (tex_info->format == DATA_FORMAT_R16_UINT && p_format == DATA_FORMAT_R4G4B4A4_UNORM_PACK16) { + // Specific cases that require buffer reinterpretation. + r_raw_reinterpretation = true; + return false; + } else if (RD_TO_D3D12_FORMAT[tex_info->format].family != RD_TO_D3D12_FORMAT[p_format].family) { + // Format family is different but copying resources directly is possible. + return false; + } else { + // Format family is the same and the view can just cast the format. + return true; + } + } +} + /*****************/ /**** SAMPLER ****/ /*****************/ @@ -1842,20 +1842,328 @@ void RenderingDeviceDriverD3D12::vertex_format_free(VertexFormatID p_vertex_form /**** BARRIERS ****/ /******************/ -void RenderingDeviceDriverD3D12::command_pipeline_barrier( - CommandBufferID p_cmd_buffer, - BitField p_src_stages, - BitField p_dst_stages, +static D3D12_BARRIER_ACCESS _rd_texture_layout_access_mask(RDD::TextureLayout p_texture_layout) { + switch (p_texture_layout) { + case RDD::TEXTURE_LAYOUT_STORAGE_OPTIMAL: + return D3D12_BARRIER_ACCESS_UNORDERED_ACCESS; + case RDD::TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + return D3D12_BARRIER_ACCESS_RENDER_TARGET; + case RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + return D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ | D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE; + case RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: + return D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ; + case RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + return D3D12_BARRIER_ACCESS_SHADER_RESOURCE; + case RDD::TEXTURE_LAYOUT_COPY_SRC_OPTIMAL: + return D3D12_BARRIER_ACCESS_COPY_SOURCE; + case RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL: + return D3D12_BARRIER_ACCESS_COPY_DEST; + case RDD::TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL: + return D3D12_BARRIER_ACCESS_RESOLVE_SOURCE; + case RDD::TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL: + return D3D12_BARRIER_ACCESS_RESOLVE_DEST; + case RDD::TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL: + return D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE; + default: + return D3D12_BARRIER_ACCESS_NO_ACCESS; + } +} + +static void _rd_access_to_d3d12_and_mask(BitField p_access, RDD::TextureLayout p_texture_layout, D3D12_BARRIER_ACCESS &r_access, D3D12_BARRIER_SYNC &r_sync_mask) { + r_access = D3D12_BARRIER_ACCESS_COMMON; + r_sync_mask = D3D12_BARRIER_SYNC_NONE; + + if (p_access.has_flag(RDD::BARRIER_ACCESS_INDIRECT_COMMAND_READ_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT; + r_sync_mask |= D3D12_BARRIER_SYNC_EXECUTE_INDIRECT; + } + + if (p_access.has_flag(RDD::BARRIER_ACCESS_INDEX_READ_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_INDEX_BUFFER; + r_sync_mask |= D3D12_BARRIER_SYNC_INDEX_INPUT | D3D12_BARRIER_SYNC_DRAW; + } + + if (p_access.has_flag(RDD::BARRIER_ACCESS_VERTEX_ATTRIBUTE_READ_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_VERTEX_BUFFER; + r_sync_mask |= D3D12_BARRIER_SYNC_VERTEX_SHADING | D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_ALL_SHADING; + } + + if (p_access.has_flag(RDD::BARRIER_ACCESS_UNIFORM_READ_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_CONSTANT_BUFFER; + r_sync_mask |= D3D12_BARRIER_SYNC_VERTEX_SHADING | D3D12_BARRIER_SYNC_PIXEL_SHADING | D3D12_BARRIER_SYNC_COMPUTE_SHADING | + D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_ALL_SHADING; + } + + if (p_access.has_flag(RDD::BARRIER_ACCESS_INPUT_ATTACHMENT_READ_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_RENDER_TARGET; + r_sync_mask |= D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_RENDER_TARGET; + } + + if (p_access.has_flag(RDD::BARRIER_ACCESS_COPY_READ_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_COPY_SOURCE; + r_sync_mask |= D3D12_BARRIER_SYNC_COPY; + } + + if (p_access.has_flag(RDD::BARRIER_ACCESS_COPY_WRITE_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_COPY_DEST; + r_sync_mask |= D3D12_BARRIER_SYNC_COPY; + } + + if (p_access.has_flag(RDD::BARRIER_ACCESS_RESOLVE_READ_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_RESOLVE_SOURCE; + r_sync_mask |= D3D12_BARRIER_SYNC_RESOLVE; + } + + if (p_access.has_flag(RDD::BARRIER_ACCESS_RESOLVE_WRITE_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_RESOLVE_DEST; + r_sync_mask |= D3D12_BARRIER_SYNC_RESOLVE; + } + + if (p_access.has_flag(RDD::BARRIER_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE; + r_sync_mask |= D3D12_BARRIER_SYNC_PIXEL_SHADING | D3D12_BARRIER_SYNC_ALL_SHADING; + } + + const D3D12_BARRIER_SYNC unordered_access_mask = D3D12_BARRIER_SYNC_VERTEX_SHADING | D3D12_BARRIER_SYNC_PIXEL_SHADING | D3D12_BARRIER_SYNC_COMPUTE_SHADING | + D3D12_BARRIER_SYNC_VERTEX_SHADING | D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_ALL_SHADING | D3D12_BARRIER_SYNC_CLEAR_UNORDERED_ACCESS_VIEW; + + if (p_access.has_flag(RDD::BARRIER_ACCESS_STORAGE_CLEAR_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS; + r_sync_mask |= unordered_access_mask; + } + + // These access bits only have compatibility with certain layouts unlike in Vulkan where they imply specific operations in the same layout. + if (p_access.has_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS; + r_sync_mask |= unordered_access_mask; + } else if (p_access.has_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT)) { + if (p_texture_layout == RDD::TEXTURE_LAYOUT_STORAGE_OPTIMAL) { + // Unordered access must be enforced if the texture is using the storage layout. + r_access |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS; + r_sync_mask |= unordered_access_mask; + } else { + r_access |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE; + r_sync_mask |= D3D12_BARRIER_SYNC_VERTEX_SHADING | D3D12_BARRIER_SYNC_PIXEL_SHADING | D3D12_BARRIER_SYNC_COMPUTE_SHADING | D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_ALL_SHADING; + } + } + + if (p_access.has_flag(RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_WRITE_BIT) || p_access.has_flag(RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_READ_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_RENDER_TARGET; + r_sync_mask |= D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_RENDER_TARGET; + } + + if (p_access.has_flag(RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE; + r_sync_mask |= D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_DEPTH_STENCIL; + } else if (p_access.has_flag(RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ; + r_sync_mask |= D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_DEPTH_STENCIL; + } +} + +static void _rd_stages_to_d3d12(BitField p_stages, D3D12_BARRIER_SYNC &r_sync) { + if (p_stages.has_flag(RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT)) { + r_sync = D3D12_BARRIER_SYNC_ALL; + } else { + if (p_stages.has_flag(RDD::PIPELINE_STAGE_DRAW_INDIRECT_BIT)) { + r_sync |= D3D12_BARRIER_SYNC_EXECUTE_INDIRECT; + } + + if (p_stages.has_flag(RDD::PIPELINE_STAGE_VERTEX_INPUT_BIT)) { + r_sync |= D3D12_BARRIER_SYNC_INDEX_INPUT; + } + + if (p_stages.has_flag(RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT)) { + r_sync |= D3D12_BARRIER_SYNC_VERTEX_SHADING; + } + + if (p_stages.has_flag(RDD::PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT) || p_stages.has_flag(RDD::PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT) || p_stages.has_flag(RDD::PIPELINE_STAGE_GEOMETRY_SHADER_BIT)) { + // There's no granularity for tessellation or geometry stages. The specification defines it as part of vertex shading. + r_sync |= D3D12_BARRIER_SYNC_VERTEX_SHADING; + } + + if (p_stages.has_flag(RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT)) { + r_sync |= D3D12_BARRIER_SYNC_PIXEL_SHADING; + } + + if (p_stages.has_flag(RDD::PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT) || p_stages.has_flag(RDD::PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT)) { + // Covers both read and write operations for depth stencil. + r_sync |= D3D12_BARRIER_SYNC_DEPTH_STENCIL; + } + + if (p_stages.has_flag(RDD::PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) { + r_sync |= D3D12_BARRIER_SYNC_RENDER_TARGET; + } + + if (p_stages.has_flag(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT)) { + r_sync |= D3D12_BARRIER_SYNC_COMPUTE_SHADING; + } + + if (p_stages.has_flag(RDD::PIPELINE_STAGE_COPY_BIT)) { + r_sync |= D3D12_BARRIER_SYNC_COPY; + } + + if (p_stages.has_flag(RDD::PIPELINE_STAGE_RESOLVE_BIT)) { + r_sync |= D3D12_BARRIER_SYNC_RESOLVE; + } + + if (p_stages.has_flag(RDD::PIPELINE_STAGE_CLEAR_STORAGE_BIT)) { + r_sync |= D3D12_BARRIER_SYNC_CLEAR_UNORDERED_ACCESS_VIEW; + } + + if (p_stages.has_flag(RDD::PIPELINE_STAGE_ALL_GRAPHICS_BIT)) { + r_sync |= D3D12_BARRIER_SYNC_DRAW; + } + } +} + +static void _rd_stages_and_access_to_d3d12(BitField p_stages, RDD::TextureLayout p_texture_layout, BitField p_access, D3D12_BARRIER_SYNC &r_sync, D3D12_BARRIER_ACCESS &r_access) { + D3D12_BARRIER_SYNC sync_mask; + r_sync = D3D12_BARRIER_SYNC_NONE; + + if (p_texture_layout == RDD::TEXTURE_LAYOUT_UNDEFINED) { + // Undefined texture layouts are a special case where no access bits or synchronization scopes are allowed. + r_access = D3D12_BARRIER_ACCESS_NO_ACCESS; + return; + } + + // Convert access bits to the D3D12 barrier access bits. + _rd_access_to_d3d12_and_mask(p_access, p_texture_layout, r_access, sync_mask); + + if (p_texture_layout != RDD::TEXTURE_LAYOUT_MAX) { + // Only allow the access bits compatible with the texture layout. + r_access &= _rd_texture_layout_access_mask(p_texture_layout); + } + + // Convert stage bits to the D3D12 synchronization scope bits. + _rd_stages_to_d3d12(p_stages, r_sync); + + // Only enable synchronization stages compatible with the access bits that were used. + r_sync &= sync_mask; + + if (r_sync == D3D12_BARRIER_SYNC_NONE) { + if (p_access.is_empty()) { + // No valid synchronization scope was defined and no access in particular is required. + r_access = D3D12_BARRIER_ACCESS_NO_ACCESS; + } else { + // Access is required but the synchronization scope wasn't compatible. We fall back to the global synchronization scope and access. + r_sync = D3D12_BARRIER_SYNC_ALL; + r_access = D3D12_BARRIER_ACCESS_COMMON; + } + } +} + +static D3D12_BARRIER_LAYOUT _rd_texture_layout_to_d3d12_barrier_layout(RDD::TextureLayout p_texture_layout) { + switch (p_texture_layout) { + case RDD::TEXTURE_LAYOUT_UNDEFINED: + return D3D12_BARRIER_LAYOUT_UNDEFINED; + case RDD::TEXTURE_LAYOUT_STORAGE_OPTIMAL: + return D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS; + case RDD::TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + return D3D12_BARRIER_LAYOUT_RENDER_TARGET; + case RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + return D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE; + case RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: + return D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ; + case RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + return D3D12_BARRIER_LAYOUT_SHADER_RESOURCE; + case RDD::TEXTURE_LAYOUT_COPY_SRC_OPTIMAL: + return D3D12_BARRIER_LAYOUT_COPY_SOURCE; + case RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL: + return D3D12_BARRIER_LAYOUT_COPY_DEST; + case RDD::TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL: + return D3D12_BARRIER_LAYOUT_RESOLVE_SOURCE; + case RDD::TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL: + return D3D12_BARRIER_LAYOUT_RESOLVE_DEST; + case RDD::TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL: + return D3D12_BARRIER_LAYOUT_SHADING_RATE_SOURCE; + default: + DEV_ASSERT(false && "Unknown texture layout."); + return D3D12_BARRIER_LAYOUT_UNDEFINED; + } +} + +void RenderingDeviceDriverD3D12::command_pipeline_barrier(CommandBufferID p_cmd_buffer, + BitField p_src_stages, + BitField p_dst_stages, VectorView p_memory_barriers, VectorView p_buffer_barriers, VectorView p_texture_barriers) { - if (p_src_stages.has_flag(PIPELINE_STAGE_ALL_COMMANDS_BIT) && p_dst_stages.has_flag(PIPELINE_STAGE_ALL_COMMANDS_BIT)) { - // Looks like the intent is a full barrier. - // In the resource barriers world, we can force a full barrier by discarding some resource, as per - // https://microsoft.github.io/DirectX-Specs/d3d/D3D12EnhancedBarriers.html#synchronous-copy-discard-and-resolve. - const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; - cmd_buf_info->cmd_list->DiscardResource(frames[frame_idx].aux_resource->GetResource(), nullptr); + if (!barrier_capabilities.enhanced_barriers_supported) { + // Enhanced barriers are a requirement for this function. + return; + } + + if (p_memory_barriers.size() == 0 && p_buffer_barriers.size() == 0 && p_texture_barriers.size() == 0) { + // At least one barrier must be present in the arguments. + return; } + + // The command list must support the required interface. + const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)(p_cmd_buffer.id); + ID3D12GraphicsCommandList7 *cmd_list_7 = nullptr; + HRESULT res = cmd_buf_info->cmd_list->QueryInterface(IID_PPV_ARGS(&cmd_list_7)); + ERR_FAIL_COND(FAILED(res)); + + // Convert the RDD barriers to D3D12 enhanced barriers. + thread_local LocalVector global_barriers; + thread_local LocalVector buffer_barriers; + thread_local LocalVector texture_barriers; + global_barriers.clear(); + buffer_barriers.clear(); + texture_barriers.clear(); + + D3D12_GLOBAL_BARRIER global_barrier = {}; + for (uint32_t i = 0; i < p_memory_barriers.size(); i++) { + const MemoryBarrier &memory_barrier = p_memory_barriers[i]; + _rd_stages_and_access_to_d3d12(p_src_stages, RDD::TEXTURE_LAYOUT_MAX, memory_barrier.src_access, global_barrier.SyncBefore, global_barrier.AccessBefore); + _rd_stages_and_access_to_d3d12(p_dst_stages, RDD::TEXTURE_LAYOUT_MAX, memory_barrier.dst_access, global_barrier.SyncAfter, global_barrier.AccessAfter); + global_barriers.push_back(global_barrier); + } + + D3D12_BUFFER_BARRIER buffer_barrier_d3d12 = {}; + buffer_barrier_d3d12.Offset = 0; + buffer_barrier_d3d12.Size = UINT64_MAX; // The specification says this must be the size of the buffer barrier. + for (uint32_t i = 0; i < p_buffer_barriers.size(); i++) { + const BufferBarrier &buffer_barrier_rd = p_buffer_barriers[i]; + const BufferInfo *buffer_info = (const BufferInfo *)(buffer_barrier_rd.buffer.id); + _rd_stages_and_access_to_d3d12(p_src_stages, RDD::TEXTURE_LAYOUT_MAX, buffer_barrier_rd.src_access, buffer_barrier_d3d12.SyncBefore, buffer_barrier_d3d12.AccessBefore); + _rd_stages_and_access_to_d3d12(p_dst_stages, RDD::TEXTURE_LAYOUT_MAX, buffer_barrier_rd.dst_access, buffer_barrier_d3d12.SyncAfter, buffer_barrier_d3d12.AccessAfter); + buffer_barrier_d3d12.pResource = buffer_info->resource; + buffer_barriers.push_back(buffer_barrier_d3d12); + } + + D3D12_TEXTURE_BARRIER texture_barrier_d3d12 = {}; + for (uint32_t i = 0; i < p_texture_barriers.size(); i++) { + const TextureBarrier &texture_barrier_rd = p_texture_barriers[i]; + const TextureInfo *texture_info = (const TextureInfo *)(texture_barrier_rd.texture.id); + _rd_stages_and_access_to_d3d12(p_src_stages, texture_barrier_rd.prev_layout, texture_barrier_rd.src_access, texture_barrier_d3d12.SyncBefore, texture_barrier_d3d12.AccessBefore); + _rd_stages_and_access_to_d3d12(p_dst_stages, texture_barrier_rd.next_layout, texture_barrier_rd.dst_access, texture_barrier_d3d12.SyncAfter, texture_barrier_d3d12.AccessAfter); + texture_barrier_d3d12.LayoutBefore = _rd_texture_layout_to_d3d12_barrier_layout(texture_barrier_rd.prev_layout); + texture_barrier_d3d12.LayoutAfter = _rd_texture_layout_to_d3d12_barrier_layout(texture_barrier_rd.next_layout); + texture_barrier_d3d12.pResource = texture_info->resource; + texture_barrier_d3d12.Subresources.IndexOrFirstMipLevel = texture_barrier_rd.subresources.base_mipmap; + texture_barrier_d3d12.Subresources.NumMipLevels = texture_barrier_rd.subresources.mipmap_count; + texture_barrier_d3d12.Subresources.FirstArraySlice = texture_barrier_rd.subresources.base_layer; + texture_barrier_d3d12.Subresources.NumArraySlices = texture_barrier_rd.subresources.layer_count; + texture_barrier_d3d12.Subresources.FirstPlane = _compute_plane_slice(texture_info->format, texture_barrier_rd.subresources.aspect); + texture_barrier_d3d12.Subresources.NumPlanes = format_get_plane_count(texture_info->format); + texture_barrier_d3d12.Flags = (texture_barrier_rd.prev_layout == RDD::TEXTURE_LAYOUT_UNDEFINED) ? D3D12_TEXTURE_BARRIER_FLAG_DISCARD : D3D12_TEXTURE_BARRIER_FLAG_NONE; + texture_barriers.push_back(texture_barrier_d3d12); + } + + // Define the barrier groups and execute. + D3D12_BARRIER_GROUP barrier_groups[3] = {}; + barrier_groups[0].Type = D3D12_BARRIER_TYPE_GLOBAL; + barrier_groups[1].Type = D3D12_BARRIER_TYPE_BUFFER; + barrier_groups[2].Type = D3D12_BARRIER_TYPE_TEXTURE; + barrier_groups[0].NumBarriers = global_barriers.size(); + barrier_groups[1].NumBarriers = buffer_barriers.size(); + barrier_groups[2].NumBarriers = texture_barriers.size(); + barrier_groups[0].pGlobalBarriers = global_barriers.ptr(); + barrier_groups[1].pBufferBarriers = buffer_barriers.ptr(); + barrier_groups[2].pTextureBarriers = texture_barriers.ptr(); + cmd_list_7->Barrier(ARRAY_SIZE(barrier_groups), barrier_groups); } /****************/ @@ -3464,6 +3772,8 @@ RDD::ShaderID RenderingDeviceDriverD3D12::shader_create_from_bytecode(const Vect zstd_size = STEPIFY(zstd_size, 4); read_offset += zstd_size; ERR_FAIL_COND_V(read_offset > binsize, ShaderID()); + + r_shader_desc.stages.push_back(ShaderStage(stage)); } const uint8_t *root_sig_data_ptr = binptr + read_offset; @@ -3795,6 +4105,10 @@ void RenderingDeviceDriverD3D12::uniform_set_free(UniformSetID p_uniform_set) { // ----- COMMANDS ----- void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) { + if (barrier_capabilities.enhanced_barriers_supported) { + return; + } + // Perform pending blackouts. { SelfList *E = textures_pending_clear.first(); @@ -3802,7 +4116,7 @@ void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBuff TextureSubresourceRange subresources; subresources.layer_count = E->self()->layers; subresources.mipmap_count = E->self()->mipmaps; - command_clear_color_texture(p_cmd_buffer, TextureID(E->self()), TEXTURE_LAYOUT_GENERAL, Color(), subresources); + command_clear_color_texture(p_cmd_buffer, TextureID(E->self()), TEXTURE_LAYOUT_UNDEFINED, Color(), subresources); SelfList *next = E->next(); E->remove_from_list(); @@ -3935,34 +4249,6 @@ void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBuff for (uint32_t i = 0; i < tex_info->layers; i++) { for (uint32_t j = 0; j < tex_info->mipmaps; j++) { uint32_t subresource = D3D12CalcSubresource(tex_info->base_mip + j, tex_info->base_layer + i, 0, tex_info->desc.MipLevels, tex_info->desc.ArraySize()); - - if ((wanted_state & D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE)) { - // [[CROSS_FAMILY_FALLBACK]]. - if (tex_info->owner_info.resource && tex_info->main_texture && tex_info->main_texture != tex_info) { - uint32_t subres_qword = subresource / 64; - uint64_t subres_mask = (uint64_t(1) << (subresource % 64)); - if ((tex_info->main_texture->states_ptr->xfamily_fallback.subresources_dirty[subres_qword] & subres_mask)) { - // Prepare for copying the write-to texture to this one, if out-of-date. - _resource_transition_batch(tex_info->main_texture, subresource, planes, D3D12_RESOURCE_STATE_COPY_SOURCE); - _resource_transition_batch(tex_info, subresource, planes, D3D12_RESOURCE_STATE_COPY_DEST); - - CommandBufferInfo::FamilyFallbackCopy ffc; - ffc.texture = tex_info; - ffc.subresource = subresource; - ffc.mipmap = j; - ffc.dst_wanted_state = wanted_state; - - CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; - cmd_buf_info->family_fallback_copies.resize(cmd_buf_info->family_fallback_copies.size() + 1); - cmd_buf_info->family_fallback_copies[cmd_buf_info->family_fallback_copy_count] = ffc; - cmd_buf_info->family_fallback_copy_count++; - - tex_info->main_texture->states_ptr->xfamily_fallback.subresources_dirty[subres_qword] &= ~subres_mask; - } - continue; - } - } - _resource_transition_batch(tex_info, subresource, planes, wanted_state); } } @@ -3974,55 +4260,6 @@ void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBuff if (p_set_index == shader_info_in->sets.size() - 1) { CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); - - // [[CROSS_FAMILY_FALLBACK]]. - for (uint32_t i = 0; i < cmd_buf_info->family_fallback_copy_count; i++) { - const CommandBufferInfo::FamilyFallbackCopy &ffc = cmd_buf_info->family_fallback_copies[i]; - - D3D12_TEXTURE_COPY_LOCATION dst_tex = {}; - dst_tex.pResource = ffc.texture->resource; - dst_tex.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - dst_tex.SubresourceIndex = ffc.subresource; - - D3D12_TEXTURE_COPY_LOCATION src_tex = {}; - src_tex.pResource = ffc.texture->main_texture->resource; - src_tex.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - src_tex.SubresourceIndex = ffc.subresource; - - const ResourceInfo::States::CrossFamillyFallback &xfamily = ffc.texture->main_texture->owner_info.states.xfamily_fallback; - if (xfamily.interim_buffer.Get()) { - // Must copy via a buffer due to reinterpret-copy known not to be available for these data types. - D3D12_TEXTURE_COPY_LOCATION buf_loc = {}; - buf_loc.pResource = xfamily.interim_buffer.Get(); - buf_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - buf_loc.PlacedFootprint.Offset = 0; - buf_loc.PlacedFootprint.Footprint.Format = ffc.texture->main_texture->desc.Format; - buf_loc.PlacedFootprint.Footprint.Width = MAX(1u, ffc.texture->main_texture->desc.Width >> ffc.mipmap); - buf_loc.PlacedFootprint.Footprint.Height = MAX(1u, ffc.texture->main_texture->desc.Height >> ffc.mipmap); - buf_loc.PlacedFootprint.Footprint.Depth = MAX(1u, (uint32_t)ffc.texture->main_texture->desc.Depth() >> ffc.mipmap); - buf_loc.PlacedFootprint.Footprint.RowPitch = STEPIFY(buf_loc.PlacedFootprint.Footprint.Width * sizeof(uint16_t), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); - - D3D12_RESOURCE_BARRIER barrier = CD3DX12_RESOURCE_BARRIER::Transition(xfamily.interim_buffer.Get(), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_COPY_DEST); - cmd_buf_info->cmd_list->ResourceBarrier(1, &barrier); - - cmd_buf_info->cmd_list->CopyTextureRegion(&buf_loc, 0, 0, 0, &src_tex, nullptr); - - barrier = CD3DX12_RESOURCE_BARRIER::Transition(xfamily.interim_buffer.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COPY_SOURCE); - cmd_buf_info->cmd_list->ResourceBarrier(1, &barrier); - - buf_loc.PlacedFootprint.Footprint.Format = ffc.texture->desc.Format; - cmd_buf_info->cmd_list->CopyTextureRegion(&dst_tex, 0, 0, 0, &buf_loc, nullptr); - } else { - // Direct copy is possible. - cmd_buf_info->cmd_list->CopyTextureRegion(&dst_tex, 0, 0, 0, &src_tex, nullptr); - } - - // Set the specific SRV state we wanted from the beginning to the alternative version of the texture. - _resource_transition_batch(ffc.texture, ffc.subresource, 1, ffc.dst_wanted_state); - } - cmd_buf_info->family_fallback_copy_count = 0; - - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); } } @@ -4299,8 +4536,10 @@ void RenderingDeviceDriverD3D12::command_clear_buffer(CommandBufferID p_cmd_buff } } - _resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + if (!barrier_capabilities.enhanced_barriers_supported) { + _resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + } D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = {}; uav_desc.Format = DXGI_FORMAT_R32_TYPELESS; @@ -4340,9 +4579,11 @@ void RenderingDeviceDriverD3D12::command_copy_buffer(CommandBufferID p_cmd_buffe BufferInfo *src_buf_info = (BufferInfo *)p_src_buffer.id; BufferInfo *buf_loc_info = (BufferInfo *)p_buf_locfer.id; - _resource_transition_batch(src_buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); - _resource_transition_batch(buf_loc_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST); - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + if (!barrier_capabilities.enhanced_barriers_supported) { + _resource_transition_batch(src_buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); + _resource_transition_batch(buf_loc_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST); + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + } for (uint32_t i = 0; i < p_regions.size(); i++) { cmd_buf_info->cmd_list->CopyBufferRegion(buf_loc_info->resource, p_regions[i].dst_offset, src_buf_info->resource, p_regions[i].src_offset, p_regions[i].size); @@ -4354,43 +4595,37 @@ void RenderingDeviceDriverD3D12::command_copy_texture(CommandBufferID p_cmd_buff TextureInfo *src_tex_info = (TextureInfo *)p_src_texture.id; TextureInfo *dst_tex_info = (TextureInfo *)p_dst_texture.id; - for (uint32_t i = 0; i < p_regions.size(); i++) { - UINT src_subresource = D3D12CalcSubresource( - p_regions[i].src_subresources.mipmap, - p_regions[i].src_subresources.base_layer, - _compute_plane_slice(src_tex_info->format, p_regions[i].src_subresources.aspect), - src_tex_info->desc.MipLevels, - src_tex_info->desc.ArraySize()); - _resource_transition_batch(src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); - - UINT dst_subresource = D3D12CalcSubresource( - p_regions[i].dst_subresources.mipmap, - p_regions[i].dst_subresources.base_layer, - _compute_plane_slice(dst_tex_info->format, p_regions[i].dst_subresources.aspect), - dst_tex_info->desc.MipLevels, - dst_tex_info->desc.ArraySize()); - _resource_transition_batch(dst_tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_COPY_DEST); + if (!barrier_capabilities.enhanced_barriers_supported) { + // Batch all barrier transitions for the textures before performing the copies. + for (uint32_t i = 0; i < p_regions.size(); i++) { + uint32_t layer_count = MIN(p_regions[i].src_subresources.layer_count, p_regions[i].dst_subresources.layer_count); + for (uint32_t j = 0; j < layer_count; j++) { + UINT src_subresource = _compute_subresource_from_layers(src_tex_info, p_regions[i].src_subresources, j); + UINT dst_subresource = _compute_subresource_from_layers(dst_tex_info, p_regions[i].dst_subresources, j); + _resource_transition_batch(src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); + _resource_transition_batch(dst_tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_COPY_DEST); + } + } _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + } - CD3DX12_TEXTURE_COPY_LOCATION src_location(src_tex_info->resource, src_subresource); - CD3DX12_TEXTURE_COPY_LOCATION dst_location(dst_tex_info->resource, dst_subresource); - - CD3DX12_BOX src_box( - p_regions[i].src_offset.x, - p_regions[i].src_offset.y, - p_regions[i].src_offset.z, - p_regions[i].src_offset.x + p_regions[i].size.x, - p_regions[i].src_offset.y + p_regions[i].size.y, - p_regions[i].src_offset.z + p_regions[i].size.z); - - cmd_buf_info->cmd_list->CopyTextureRegion( - &dst_location, - p_regions[i].dst_offset.x, - p_regions[i].dst_offset.y, - p_regions[i].dst_offset.z, - &src_location, - &src_box); + CD3DX12_BOX src_box; + for (uint32_t i = 0; i < p_regions.size(); i++) { + uint32_t layer_count = MIN(p_regions[i].src_subresources.layer_count, p_regions[i].dst_subresources.layer_count); + for (uint32_t j = 0; j < layer_count; j++) { + UINT src_subresource = _compute_subresource_from_layers(src_tex_info, p_regions[i].src_subresources, j); + UINT dst_subresource = _compute_subresource_from_layers(dst_tex_info, p_regions[i].dst_subresources, j); + CD3DX12_TEXTURE_COPY_LOCATION src_location(src_tex_info->resource, src_subresource); + CD3DX12_TEXTURE_COPY_LOCATION dst_location(dst_tex_info->resource, dst_subresource); + src_box.left = p_regions[i].src_offset.x; + src_box.top = p_regions[i].src_offset.y; + src_box.front = p_regions[i].src_offset.z; + src_box.right = p_regions[i].src_offset.x + p_regions[i].size.x; + src_box.bottom = p_regions[i].src_offset.y + p_regions[i].size.y; + src_box.back = p_regions[i].src_offset.z + p_regions[i].size.z; + cmd_buf_info->cmd_list->CopyTextureRegion(&dst_location, p_regions[i].dst_offset.x, p_regions[i].dst_offset.y, p_regions[i].dst_offset.z, &src_location, &src_box); + } } } @@ -4400,12 +4635,12 @@ void RenderingDeviceDriverD3D12::command_resolve_texture(CommandBufferID p_cmd_b TextureInfo *dst_tex_info = (TextureInfo *)p_dst_texture.id; UINT src_subresource = D3D12CalcSubresource(p_src_mipmap, p_src_layer, 0, src_tex_info->desc.MipLevels, src_tex_info->desc.ArraySize()); - _resource_transition_batch(src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_SOURCE); - UINT dst_subresource = D3D12CalcSubresource(p_dst_mipmap, p_dst_layer, 0, dst_tex_info->desc.MipLevels, dst_tex_info->desc.ArraySize()); - _resource_transition_batch(dst_tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_DEST); - - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + if (!barrier_capabilities.enhanced_barriers_supported) { + _resource_transition_batch(src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_SOURCE); + _resource_transition_batch(dst_tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_DEST); + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + } cmd_buf_info->cmd_list->ResolveSubresource(dst_tex_info->resource, dst_subresource, src_tex_info->resource, src_subresource, RD_TO_D3D12_FORMAT[src_tex_info->format].general_format); } @@ -4446,7 +4681,9 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c } } - _transition_subresources(D3D12_RESOURCE_STATE_RENDER_TARGET); + if (!barrier_capabilities.enhanced_barriers_supported) { + _transition_subresources(D3D12_RESOURCE_STATE_RENDER_TARGET); + } for (uint32_t i = 0; i < p_subresources.mipmap_count; i++) { D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = _make_rtv_for_texture(tex_info, p_subresources.base_mipmap + i, p_subresources.base_layer, p_subresources.layer_count, false); @@ -4464,7 +4701,7 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c frames[frame_idx].desc_heap_walkers.rtv.advance(); } - } else { + } else if (tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) { // Clear via UAV. _command_check_descriptor_sets(p_cmd_buffer); @@ -4489,7 +4726,9 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c } } - _transition_subresources(D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + if (!barrier_capabilities.enhanced_barriers_supported) { + _transition_subresources(D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + } for (uint32_t i = 0; i < p_subresources.mipmap_count; i++) { D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = _make_ranged_uav_for_texture(tex_info, p_subresources.base_mipmap + i, p_subresources.base_layer, p_subresources.layer_count, false); @@ -4510,6 +4749,7 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c (UINT)p_color.get_b8(), (UINT)p_color.get_a8(), }; + cmd_buf_info->cmd_list->ClearUnorderedAccessViewUint( frames[frame_idx].desc_heap_walkers.resources.get_curr_gpu_handle(), frames[frame_idx].desc_heap_walkers.aux.get_curr_cpu_handle(), @@ -4521,6 +4761,8 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c frames[frame_idx].desc_heap_walkers.resources.advance(); frames[frame_idx].desc_heap_walkers.aux.advance(); } + } else { + ERR_FAIL_MSG("Cannot clear texture because its format does not support UAV writes. You'll need to update its contents through another method."); } } @@ -4528,8 +4770,7 @@ void RenderingDeviceDriverD3D12::command_copy_buffer_to_texture(CommandBufferID const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; BufferInfo *buf_info = (BufferInfo *)p_src_buffer.id; TextureInfo *tex_info = (TextureInfo *)p_dst_texture.id; - - if (buf_info->flags.is_for_upload) { + if (!barrier_capabilities.enhanced_barriers_supported) { _resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); } @@ -4557,19 +4798,21 @@ void RenderingDeviceDriverD3D12::command_copy_buffer_to_texture(CommandBufferID STEPIFY(p_regions[i].texture_region_size.y, block_h), p_regions[i].texture_region_size.z); - for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) { - UINT dst_subresource = D3D12CalcSubresource( - p_regions[i].texture_subresources.mipmap, - p_regions[i].texture_subresources.base_layer + j, - _compute_plane_slice(tex_info->format, p_regions[i].texture_subresources.aspect), - tex_info->desc.MipLevels, - tex_info->desc.ArraySize()); - CD3DX12_TEXTURE_COPY_LOCATION copy_dst(tex_info->resource, dst_subresource); + if (!barrier_capabilities.enhanced_barriers_supported) { + for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) { + UINT dst_subresource = D3D12CalcSubresource( + p_regions[i].texture_subresources.mipmap, + p_regions[i].texture_subresources.base_layer + j, + _compute_plane_slice(tex_info->format, p_regions[i].texture_subresources.aspect), + tex_info->desc.MipLevels, + tex_info->desc.ArraySize()); + CD3DX12_TEXTURE_COPY_LOCATION copy_dst(tex_info->resource, dst_subresource); - _resource_transition_batch(tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_COPY_DEST); - } + _resource_transition_batch(tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_COPY_DEST); + } - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + } for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) { UINT dst_subresource = D3D12CalcSubresource( @@ -4596,24 +4839,28 @@ void RenderingDeviceDriverD3D12::command_copy_texture_to_buffer(CommandBufferID TextureInfo *tex_info = (TextureInfo *)p_src_texture.id; BufferInfo *buf_info = (BufferInfo *)p_buf_locfer.id; - _resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST); + if (!barrier_capabilities.enhanced_barriers_supported) { + _resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST); + } uint32_t block_w = 0, block_h = 0; get_compressed_image_format_block_dimensions(tex_info->format, block_w, block_h); for (uint32_t i = 0; i < p_regions.size(); i++) { - for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) { - UINT src_subresource = D3D12CalcSubresource( - p_regions[i].texture_subresources.mipmap, - p_regions[i].texture_subresources.base_layer + j, - _compute_plane_slice(tex_info->format, p_regions[i].texture_subresources.aspect), - tex_info->desc.MipLevels, - tex_info->desc.ArraySize()); + if (!barrier_capabilities.enhanced_barriers_supported) { + for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) { + UINT src_subresource = D3D12CalcSubresource( + p_regions[i].texture_subresources.mipmap, + p_regions[i].texture_subresources.base_layer + j, + _compute_plane_slice(tex_info->format, p_regions[i].texture_subresources.aspect), + tex_info->desc.MipLevels, + tex_info->desc.ArraySize()); - _resource_transition_batch(tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); - } + _resource_transition_batch(tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); + } - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + } for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) { UINT src_subresource = D3D12CalcSubresource( @@ -4763,22 +5010,25 @@ void RenderingDeviceDriverD3D12::command_begin_render_pass(CommandBufferID p_cmd } }; - for (uint32_t i = 0; i < fb_info->attachments.size(); i++) { - TextureInfo *tex_info = (TextureInfo *)fb_info->attachments[i].id; - if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) { - _transition_subresources(tex_info, D3D12_RESOURCE_STATE_RENDER_TARGET); - } else if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) { - _transition_subresources(tex_info, D3D12_RESOURCE_STATE_DEPTH_WRITE); - } else { - DEV_ASSERT(false); + if (fb_info->is_screen || !barrier_capabilities.enhanced_barriers_supported) { + // Screen framebuffers must perform this transition even if enhanced barriers are supported. + for (uint32_t i = 0; i < fb_info->attachments.size(); i++) { + TextureInfo *tex_info = (TextureInfo *)fb_info->attachments[i].id; + if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) { + _transition_subresources(tex_info, D3D12_RESOURCE_STATE_RENDER_TARGET); + } else if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) { + _transition_subresources(tex_info, D3D12_RESOURCE_STATE_DEPTH_WRITE); + } else { + DEV_ASSERT(false); + } + } + if (fb_info->vrs_attachment) { + TextureInfo *tex_info = (TextureInfo *)fb_info->vrs_attachment.id; + _transition_subresources(tex_info, D3D12_RESOURCE_STATE_SHADING_RATE_SOURCE); } - } - if (fb_info->vrs_attachment) { - TextureInfo *tex_info = (TextureInfo *)fb_info->vrs_attachment.id; - _transition_subresources(tex_info, D3D12_RESOURCE_STATE_SHADING_RATE_SOURCE); - } - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + } cmd_buf_info->render_pass_state.region_rect = CD3DX12_RECT( p_rect.position.x, @@ -5140,8 +5390,11 @@ void RenderingDeviceDriverD3D12::command_render_draw_indexed_indirect(CommandBuf CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; _bind_vertex_buffers(cmd_buf_info); BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id; - _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + if (!barrier_capabilities.enhanced_barriers_supported) { + _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + } + cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.draw_indexed.Get(), p_draw_count, indirect_buf_info->resource, p_offset, nullptr, 0); } @@ -5150,9 +5403,12 @@ void RenderingDeviceDriverD3D12::command_render_draw_indexed_indirect_count(Comm _bind_vertex_buffers(cmd_buf_info); BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id; BufferInfo *count_buf_info = (BufferInfo *)p_count_buffer.id; - _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); - _resource_transition_batch(count_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + if (!barrier_capabilities.enhanced_barriers_supported) { + _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); + _resource_transition_batch(count_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + } + cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.draw_indexed.Get(), p_max_draw_count, indirect_buf_info->resource, p_offset, count_buf_info->resource, p_count_buffer_offset); } @@ -5160,8 +5416,11 @@ void RenderingDeviceDriverD3D12::command_render_draw_indirect(CommandBufferID p_ CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; _bind_vertex_buffers(cmd_buf_info); BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id; - _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + if (!barrier_capabilities.enhanced_barriers_supported) { + _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + } + cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.draw.Get(), p_draw_count, indirect_buf_info->resource, p_offset, nullptr, 0); } @@ -5170,9 +5429,12 @@ void RenderingDeviceDriverD3D12::command_render_draw_indirect_count(CommandBuffe _bind_vertex_buffers(cmd_buf_info); BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id; BufferInfo *count_buf_info = (BufferInfo *)p_count_buffer.id; - _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); - _resource_transition_batch(count_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + if (!barrier_capabilities.enhanced_barriers_supported) { + _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); + _resource_transition_batch(count_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + } + cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.draw.Get(), p_max_draw_count, indirect_buf_info->resource, p_offset, count_buf_info->resource, p_count_buffer_offset); } @@ -5191,10 +5453,15 @@ void RenderingDeviceDriverD3D12::command_render_bind_vertex_buffers(CommandBuffe cmd_buf_info->render_pass_state.vertex_buffer_views[i] = {}; cmd_buf_info->render_pass_state.vertex_buffer_views[i].BufferLocation = buffer_info->resource->GetGPUVirtualAddress() + p_offsets[i]; cmd_buf_info->render_pass_state.vertex_buffer_views[i].SizeInBytes = buffer_info->size - p_offsets[i]; + if (!barrier_capabilities.enhanced_barriers_supported) { + _resource_transition_batch(buffer_info, 0, 1, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER); + } + } - _resource_transition_batch(buffer_info, 0, 1, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER); + if (!barrier_capabilities.enhanced_barriers_supported) { + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); } - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + cmd_buf_info->render_pass_state.vertex_buffer_count = p_binding_count; } @@ -5207,8 +5474,10 @@ void RenderingDeviceDriverD3D12::command_render_bind_index_buffer(CommandBufferI d3d12_ib_view.SizeInBytes = buffer_info->size - p_offset; d3d12_ib_view.Format = p_format == INDEX_BUFFER_FORMAT_UINT16 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT; - _resource_transition_batch(buffer_info, 0, 1, D3D12_RESOURCE_STATE_INDEX_BUFFER); - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + if (!barrier_capabilities.enhanced_barriers_supported) { + _resource_transition_batch(buffer_info, 0, 1, D3D12_RESOURCE_STATE_INDEX_BUFFER); + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + } cmd_buf_info->cmd_list->IASetIndexBuffer(&d3d12_ib_view); } @@ -5604,15 +5873,21 @@ void RenderingDeviceDriverD3D12::command_bind_compute_uniform_set(CommandBufferI void RenderingDeviceDriverD3D12::command_compute_dispatch(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) { const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + if (!barrier_capabilities.enhanced_barriers_supported) { + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + } + cmd_buf_info->cmd_list->Dispatch(p_x_groups, p_y_groups, p_z_groups); } void RenderingDeviceDriverD3D12::command_compute_dispatch_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset) { const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id; - _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + if (!barrier_capabilities.enhanced_barriers_supported) { + _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + } + cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.dispatch.Get(), 1, indirect_buf_info->resource, p_offset, nullptr, 0); } @@ -5927,11 +6202,7 @@ uint64_t RenderingDeviceDriverD3D12::limit_get(Limit p_limit) { uint64_t RenderingDeviceDriverD3D12::api_trait_get(ApiTrait p_trait) { switch (p_trait) { case API_TRAIT_HONORS_PIPELINE_BARRIERS: - // TODO: - // 1. Map fine/Vulkan/enhanced barriers to legacy barriers as closely as possible - // so there's still some advantage even without enhanced barriers available. - // 2. Implement enhanced barriers and return true where available. - return 0; + return barrier_capabilities.enhanced_barriers_supported; case API_TRAIT_SHADER_CHANGE_INVALIDATION: return (uint64_t)SHADER_CHANGE_INVALIDATION_ALL_OR_NONE_ACCORDING_TO_LAYOUT_HASH; case API_TRAIT_TEXTURE_TRANSFER_ALIGNMENT: @@ -5940,6 +6211,8 @@ uint64_t RenderingDeviceDriverD3D12::api_trait_get(ApiTrait p_trait) { return D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; case API_TRAIT_SECONDARY_VIEWPORT_SCISSOR: return false; + case API_TRAIT_CLEARS_WITH_COPY_ENGINE: + return false; default: return RenderingDeviceDriver::api_trait_get(p_trait); } @@ -6082,6 +6355,8 @@ Error RenderingDeviceDriverD3D12::_initialize_device() { // These happen due to how D3D12MA manages buffers; seems benign. D3D12_MESSAGE_ID_HEAP_ADDRESS_RANGE_HAS_NO_RESOURCE, D3D12_MESSAGE_ID_HEAP_ADDRESS_RANGE_INTERSECTS_MULTIPLE_BUFFERS, + // Seemingly a false positive. + D3D12_MESSAGE_ID_DATA_STATIC_WHILE_SET_AT_EXECUTE_DESCRIPTOR_INVALID_DATA_CHANGE, }; D3D12_INFO_QUEUE_FILTER filter = {}; @@ -6231,6 +6506,7 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() { res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS12, &options12, sizeof(options12)); if (SUCCEEDED(res)) { format_capabilities.relaxed_casting_supported = options12.RelaxedFormatCastingSupported; + barrier_capabilities.enhanced_barriers_supported = options12.EnhancedBarriersSupported; } if (vrs_capabilities.draw_call_supported || vrs_capabilities.primitive_supported || vrs_capabilities.ss_image_supported) { @@ -6263,7 +6539,7 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() { #if 0 print_verbose("- Relaxed casting supported"); #else - // Certain configurations (Windows 11 with an updated Nvida driver) crash when using relaxed casting. + // Certain configurations (Windows 11 with an updated NVIDIA driver) crash when using relaxed casting. // Therefore, we disable it temporarily until we can assure that it's reliable. // There are fallbacks in place that work in every case, if less efficient. format_capabilities.relaxed_casting_supported = false; @@ -6363,10 +6639,6 @@ Error RenderingDeviceDriverD3D12::_initialize_frames(uint32_t p_frame_count) { frames[i].desc_heap_walkers.samplers = frames[i].desc_heaps.samplers.make_walker(); frames[i].desc_heap_walkers.aux = frames[i].desc_heaps.aux.make_walker(); frames[i].desc_heap_walkers.rtv = frames[i].desc_heaps.rtv.make_walker(); - - ID3D12Resource *resource = nullptr; - HRESULT res = allocator->CreateResource(&allocation_desc, &resource_desc, D3D12_RESOURCE_STATE_COMMON, nullptr, &frames[frame_idx].aux_resource, IID_PPV_ARGS(&resource)); - ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ERR_CANT_CREATE, "D3D12MA::CreateResource failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); } return OK; diff --git a/drivers/d3d12/rendering_device_driver_d3d12.h b/drivers/d3d12/rendering_device_driver_d3d12.h index 8e1223bdaad7..e2cb5d08ba5b 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.h +++ b/drivers/d3d12/rendering_device_driver_d3d12.h @@ -122,6 +122,10 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { bool relaxed_casting_supported = false; }; + struct BarrierCapabilities { + bool enhanced_barriers_supported = false; + }; + RenderingContextDriverD3D12 *context_driver = nullptr; RenderingContextDriver::Device context_device; ComPtr adapter; @@ -136,6 +140,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { ShaderCapabilities shader_capabilities; StorageBufferCapabilities storage_buffer_capabilities; FormatCapabilities format_capabilities; + BarrierCapabilities barrier_capabilities; String pipeline_cache_id; class DescriptorsHeap { @@ -218,11 +223,6 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { // As many subresources as mipmaps * layers; planes (for depth-stencil) are tracked together. TightLocalVector subresource_states; // Used only if not a view. uint32_t last_batch_with_uav_barrier = 0; - struct CrossFamillyFallback { - TightLocalVector subresources_dirty; - ComPtr interim_buffer; - ComPtr interim_buffer_alloc; - } xfamily_fallback; // [[CROSS_FAMILY_FALLBACK]]. }; ID3D12Resource *resource = nullptr; // Non-null even if not owned. @@ -275,7 +275,6 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { uint64_t size = 0; struct { bool usable_as_uav : 1; - bool is_for_upload : 1; } flags = {}; }; @@ -317,10 +316,14 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { UINT _compute_component_mapping(const TextureView &p_view); UINT _compute_plane_slice(DataFormat p_format, BitField p_aspect_bits); UINT _compute_plane_slice(DataFormat p_format, TextureAspect p_aspect); + UINT _compute_subresource_from_layers(TextureInfo *p_texture, const TextureSubresourceLayers &p_layers, uint32_t p_layer_offset); struct CommandBufferInfo; void _discard_texture_subresources(const TextureInfo *p_tex_info, const CommandBufferInfo *p_cmd_buf_info); +protected: + virtual bool _unordered_access_supported_by_format(DataFormat p_format); + public: virtual TextureID texture_create(const TextureFormat &p_format, const TextureView &p_view) override final; virtual TextureID texture_create_from_extension(uint64_t p_native_texture, TextureType p_type, DataFormat p_format, uint32_t p_array_layers, bool p_depth_stencil) override final; @@ -332,6 +335,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { virtual uint8_t *texture_map(TextureID p_texture, const TextureSubresource &p_subresource) override final; virtual void texture_unmap(TextureID p_texture) override final; virtual BitField texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) override final; + virtual bool texture_can_make_shared_with_format(TextureID p_texture, DataFormat p_format, bool &r_raw_reinterpretation) override final; private: TextureID _texture_create_shared_from_slice(TextureID p_original_texture, const TextureView &p_view, TextureSliceType p_slice_type, uint32_t p_layer, uint32_t p_layers, uint32_t p_mipmap, uint32_t p_mipmaps); @@ -367,8 +371,8 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { virtual void command_pipeline_barrier( CommandBufferID p_cmd_buffer, - BitField p_src_stages, - BitField p_dst_stages, + BitField p_src_stages, + BitField p_dst_stages, VectorView p_memory_barriers, VectorView p_buffer_barriers, VectorView p_texture_barriers) override final; @@ -465,16 +469,6 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { RenderPassState render_pass_state; bool descriptor_heaps_set = false; - - // [[CROSS_FAMILY_FALLBACK]]. - struct FamilyFallbackCopy { - TextureInfo *texture = nullptr; - uint32_t subresource = 0; - uint32_t mipmap = 0; - D3D12_RESOURCE_STATES dst_wanted_state = {}; - }; - LocalVector family_fallback_copies; - uint32_t family_fallback_copy_count = 0; }; public: @@ -961,7 +955,6 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { bool rtv = false; } desc_heaps_exhausted_reported; CD3DX12_CPU_DESCRIPTOR_HANDLE null_rtv_handle = {}; // For [[MANUAL_SUBPASSES]]. - ComPtr aux_resource; uint32_t segment_serial = 0; #ifdef DEV_ENABLED diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp index 896fc6ff91b0..6e5b969451b4 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp @@ -264,6 +264,63 @@ static const VkFormat RD_TO_VK_FORMAT[RDD::DATA_FORMAT_MAX] = { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, }; +static VkImageLayout RD_TO_VK_LAYOUT[RDD::TEXTURE_LAYOUT_MAX] = { + VK_IMAGE_LAYOUT_UNDEFINED, // TEXTURE_LAYOUT_UNDEFINED + VK_IMAGE_LAYOUT_GENERAL, // TEXTURE_LAYOUT_STORAGE_OPTIMAL + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, // TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL + VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL, // TEXTURE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, // TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // TEXTURE_LAYOUT_COPY_SRC_OPTIMAL + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // TEXTURE_LAYOUT_COPY_DST_OPTIMAL + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL + VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR, // TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL +}; + +static VkPipelineStageFlags _rd_to_vk_pipeline_stages(BitField p_stages) { + VkPipelineStageFlags vk_flags = 0; + if (p_stages.has_flag(RDD::PIPELINE_STAGE_COPY_BIT) || p_stages.has_flag(RDD::PIPELINE_STAGE_RESOLVE_BIT)) { + // Transfer has been split into copy and resolve bits. Clear them and merge them into one bit. + vk_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + p_stages.clear_flag(RDD::PIPELINE_STAGE_COPY_BIT); + p_stages.clear_flag(RDD::PIPELINE_STAGE_RESOLVE_BIT); + } + + if (p_stages.has_flag(RDD::PIPELINE_STAGE_CLEAR_STORAGE_BIT)) { + // Vulkan should never use this as API_TRAIT_CLEAR_RESOURCES_WITH_VIEWS is not specified. + // Therefore, storage is never cleared with an explicit command. + p_stages.clear_flag(RDD::PIPELINE_STAGE_CLEAR_STORAGE_BIT); + } + + // The rest of the flags have compatible numeric values with Vulkan. + return VkPipelineStageFlags(p_stages) | vk_flags; +} + +static VkAccessFlags _rd_to_vk_access_flags(BitField p_access) { + VkAccessFlags vk_flags = 0; + if (p_access.has_flag(RDD::BARRIER_ACCESS_COPY_READ_BIT) || p_access.has_flag(RDD::BARRIER_ACCESS_RESOLVE_READ_BIT)) { + vk_flags |= VK_ACCESS_TRANSFER_READ_BIT; + p_access.clear_flag(RDD::BARRIER_ACCESS_COPY_READ_BIT); + p_access.clear_flag(RDD::BARRIER_ACCESS_RESOLVE_READ_BIT); + } + + if (p_access.has_flag(RDD::BARRIER_ACCESS_COPY_WRITE_BIT) || p_access.has_flag(RDD::BARRIER_ACCESS_RESOLVE_WRITE_BIT)) { + vk_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; + p_access.clear_flag(RDD::BARRIER_ACCESS_COPY_WRITE_BIT); + p_access.clear_flag(RDD::BARRIER_ACCESS_RESOLVE_WRITE_BIT); + } + + if (p_access.has_flag(RDD::BARRIER_ACCESS_STORAGE_CLEAR_BIT)) { + // Vulkan should never use this as API_TRAIT_CLEAR_RESOURCES_WITH_VIEWS is not specified. + // Therefore, storage is never cleared with an explicit command. + p_access.clear_flag(RDD::BARRIER_ACCESS_STORAGE_CLEAR_BIT); + } + + // The rest of the flags have compatible numeric values with Vulkan. + return VkAccessFlags(p_access) | vk_flags; +} + // RDD::CompareOperator == VkCompareOp. static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_NEVER, VK_COMPARE_OP_NEVER)); static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_LESS, VK_COMPARE_OP_LESS)); @@ -1334,18 +1391,6 @@ static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_SWIZZLE_G, VK_COMPONENT_SWIZZLE_G) static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_SWIZZLE_B, VK_COMPONENT_SWIZZLE_B)); static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_SWIZZLE_A, VK_COMPONENT_SWIZZLE_A)); -// RDD::TextureLayout == VkImageLayout. -static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_UNDEFINED)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_PREINITIALIZED, VK_IMAGE_LAYOUT_PREINITIALIZED)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR)); - // RDD::TextureAspectBits == VkImageAspectFlagBits. static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_ASPECT_COLOR_BIT, VK_IMAGE_ASPECT_COLOR_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_ASPECT_DEPTH_BIT, VK_IMAGE_ASPECT_DEPTH_BIT)); @@ -1774,6 +1819,11 @@ BitField RenderingDeviceDriverVulkan::texture_get_usages_ return supported; } +bool RenderingDeviceDriverVulkan::texture_can_make_shared_with_format(TextureID p_texture, DataFormat p_format, bool &r_raw_reinterpretation) { + r_raw_reinterpretation = false; + return true; +} + /*****************/ /**** SAMPLER ****/ /*****************/ @@ -1893,7 +1943,6 @@ static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT, V static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)); @@ -1910,8 +1959,6 @@ static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_READ_BIT, static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_TRANSFER_READ_BIT, VK_ACCESS_TRANSFER_READ_BIT)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_HOST_READ_BIT, VK_ACCESS_HOST_READ_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_HOST_WRITE_BIT, VK_ACCESS_HOST_WRITE_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_MEMORY_READ_BIT, VK_ACCESS_MEMORY_READ_BIT)); @@ -1929,8 +1976,8 @@ void RenderingDeviceDriverVulkan::command_pipeline_barrier( for (uint32_t i = 0; i < p_memory_barriers.size(); i++) { vk_memory_barriers[i] = {}; vk_memory_barriers[i].sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; - vk_memory_barriers[i].srcAccessMask = (VkPipelineStageFlags)p_memory_barriers[i].src_access; - vk_memory_barriers[i].dstAccessMask = (VkAccessFlags)p_memory_barriers[i].dst_access; + vk_memory_barriers[i].srcAccessMask = _rd_to_vk_access_flags(p_memory_barriers[i].src_access); + vk_memory_barriers[i].dstAccessMask = _rd_to_vk_access_flags(p_memory_barriers[i].dst_access); } VkBufferMemoryBarrier *vk_buffer_barriers = ALLOCA_ARRAY(VkBufferMemoryBarrier, p_buffer_barriers.size()); @@ -1939,8 +1986,8 @@ void RenderingDeviceDriverVulkan::command_pipeline_barrier( vk_buffer_barriers[i].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; vk_buffer_barriers[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; vk_buffer_barriers[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - vk_buffer_barriers[i].srcAccessMask = (VkAccessFlags)p_buffer_barriers[i].src_access; - vk_buffer_barriers[i].dstAccessMask = (VkAccessFlags)p_buffer_barriers[i].dst_access; + vk_buffer_barriers[i].srcAccessMask = _rd_to_vk_access_flags(p_buffer_barriers[i].src_access); + vk_buffer_barriers[i].dstAccessMask = _rd_to_vk_access_flags(p_buffer_barriers[i].dst_access); vk_buffer_barriers[i].buffer = ((const BufferInfo *)p_buffer_barriers[i].buffer.id)->vk_buffer; vk_buffer_barriers[i].offset = p_buffer_barriers[i].offset; vk_buffer_barriers[i].size = p_buffer_barriers[i].size; @@ -1951,10 +1998,10 @@ void RenderingDeviceDriverVulkan::command_pipeline_barrier( const TextureInfo *tex_info = (const TextureInfo *)p_texture_barriers[i].texture.id; vk_image_barriers[i] = {}; vk_image_barriers[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - vk_image_barriers[i].srcAccessMask = (VkAccessFlags)p_texture_barriers[i].src_access; - vk_image_barriers[i].dstAccessMask = (VkAccessFlags)p_texture_barriers[i].dst_access; - vk_image_barriers[i].oldLayout = (VkImageLayout)p_texture_barriers[i].prev_layout; - vk_image_barriers[i].newLayout = (VkImageLayout)p_texture_barriers[i].next_layout; + vk_image_barriers[i].srcAccessMask = _rd_to_vk_access_flags(p_texture_barriers[i].src_access); + vk_image_barriers[i].dstAccessMask = _rd_to_vk_access_flags(p_texture_barriers[i].dst_access); + vk_image_barriers[i].oldLayout = RD_TO_VK_LAYOUT[p_texture_barriers[i].prev_layout]; + vk_image_barriers[i].newLayout = RD_TO_VK_LAYOUT[p_texture_barriers[i].next_layout]; vk_image_barriers[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; vk_image_barriers[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; vk_image_barriers[i].image = tex_info->vk_view_create_info.image; @@ -1984,8 +2031,8 @@ void RenderingDeviceDriverVulkan::command_pipeline_barrier( vkCmdPipelineBarrier( (VkCommandBuffer)p_cmd_buffer.id, - (VkPipelineStageFlags)p_src_stages, - (VkPipelineStageFlags)p_dst_stages, + _rd_to_vk_pipeline_stages(p_src_stages), + _rd_to_vk_pipeline_stages(p_dst_stages), 0, p_memory_barriers.size(), vk_memory_barriers, p_buffer_barriers.size(), vk_buffer_barriers, @@ -3726,7 +3773,7 @@ void RenderingDeviceDriverVulkan::command_copy_texture(CommandBufferID p_cmd_buf const TextureInfo *src_tex_info = (const TextureInfo *)p_src_texture.id; const TextureInfo *dst_tex_info = (const TextureInfo *)p_dst_texture.id; - vkCmdCopyImage((VkCommandBuffer)p_cmd_buffer.id, src_tex_info->vk_view_create_info.image, (VkImageLayout)p_src_texture_layout, dst_tex_info->vk_view_create_info.image, (VkImageLayout)p_dst_texture_layout, p_regions.size(), vk_copy_regions); + vkCmdCopyImage((VkCommandBuffer)p_cmd_buffer.id, src_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_src_texture_layout], dst_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_dst_texture_layout], p_regions.size(), vk_copy_regions); } void RenderingDeviceDriverVulkan::command_resolve_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, uint32_t p_src_layer, uint32_t p_src_mipmap, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, uint32_t p_dst_layer, uint32_t p_dst_mipmap) { @@ -3746,7 +3793,7 @@ void RenderingDeviceDriverVulkan::command_resolve_texture(CommandBufferID p_cmd_ vk_resolve.extent.height = MAX(1u, src_tex_info->vk_create_info.extent.height >> p_src_mipmap); vk_resolve.extent.depth = MAX(1u, src_tex_info->vk_create_info.extent.depth >> p_src_mipmap); - vkCmdResolveImage((VkCommandBuffer)p_cmd_buffer.id, src_tex_info->vk_view_create_info.image, (VkImageLayout)p_src_texture_layout, dst_tex_info->vk_view_create_info.image, (VkImageLayout)p_dst_texture_layout, 1, &vk_resolve); + vkCmdResolveImage((VkCommandBuffer)p_cmd_buffer.id, src_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_src_texture_layout], dst_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_dst_texture_layout], 1, &vk_resolve); } void RenderingDeviceDriverVulkan::command_clear_color_texture(CommandBufferID p_cmd_buffer, TextureID p_texture, TextureLayout p_texture_layout, const Color &p_color, const TextureSubresourceRange &p_subresources) { @@ -3757,7 +3804,7 @@ void RenderingDeviceDriverVulkan::command_clear_color_texture(CommandBufferID p_ _texture_subresource_range_to_vk(p_subresources, &vk_subresources); const TextureInfo *tex_info = (const TextureInfo *)p_texture.id; - vkCmdClearColorImage((VkCommandBuffer)p_cmd_buffer.id, tex_info->vk_view_create_info.image, (VkImageLayout)p_texture_layout, &vk_color, 1, &vk_subresources); + vkCmdClearColorImage((VkCommandBuffer)p_cmd_buffer.id, tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_texture_layout], &vk_color, 1, &vk_subresources); } void RenderingDeviceDriverVulkan::command_copy_buffer_to_texture(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView p_regions) { @@ -3768,7 +3815,7 @@ void RenderingDeviceDriverVulkan::command_copy_buffer_to_texture(CommandBufferID const BufferInfo *buf_info = (const BufferInfo *)p_src_buffer.id; const TextureInfo *tex_info = (const TextureInfo *)p_dst_texture.id; - vkCmdCopyBufferToImage((VkCommandBuffer)p_cmd_buffer.id, buf_info->vk_buffer, tex_info->vk_view_create_info.image, (VkImageLayout)p_dst_texture_layout, p_regions.size(), vk_copy_regions); + vkCmdCopyBufferToImage((VkCommandBuffer)p_cmd_buffer.id, buf_info->vk_buffer, tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_dst_texture_layout], p_regions.size(), vk_copy_regions); } void RenderingDeviceDriverVulkan::command_copy_texture_to_buffer(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, BufferID p_dst_buffer, VectorView p_regions) { @@ -3779,7 +3826,7 @@ void RenderingDeviceDriverVulkan::command_copy_texture_to_buffer(CommandBufferID const TextureInfo *tex_info = (const TextureInfo *)p_src_texture.id; const BufferInfo *buf_info = (const BufferInfo *)p_dst_buffer.id; - vkCmdCopyImageToBuffer((VkCommandBuffer)p_cmd_buffer.id, tex_info->vk_view_create_info.image, (VkImageLayout)p_src_texture_layout, buf_info->vk_buffer, p_regions.size(), vk_copy_regions); + vkCmdCopyImageToBuffer((VkCommandBuffer)p_cmd_buffer.id, tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_src_texture_layout], buf_info->vk_buffer, p_regions.size(), vk_copy_regions); } /******************/ @@ -3925,7 +3972,7 @@ static void _attachment_reference_to_vk(const RDD::AttachmentReference &p_attach *r_vk_attachment_reference = {}; r_vk_attachment_reference->sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR; r_vk_attachment_reference->attachment = p_attachment_reference.attachment; - r_vk_attachment_reference->layout = (VkImageLayout)p_attachment_reference.layout; + r_vk_attachment_reference->layout = RD_TO_VK_LAYOUT[p_attachment_reference.layout]; r_vk_attachment_reference->aspectMask = (VkImageAspectFlags)p_attachment_reference.aspect; } @@ -3944,8 +3991,8 @@ RDD::RenderPassID RenderingDeviceDriverVulkan::render_pass_create(VectorView texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) override final; + virtual bool texture_can_make_shared_with_format(TextureID p_texture, DataFormat p_format, bool &r_raw_reinterpretation) override final; /*****************/ /**** SAMPLER ****/ diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index 15e1731823f3..474fdb387f35 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -824,8 +824,8 @@ RID RenderingDevice::texture_create(const TextureFormat &p_format, const Texture } if (texture.draw_tracker != nullptr) { - // Draw tracker can assume the texture will be in transfer destination. - texture.draw_tracker->usage = RDG::RESOURCE_USAGE_TRANSFER_TO; + // Draw tracker can assume the texture will be in copy destination. + texture.draw_tracker->usage = RDG::RESOURCE_USAGE_COPY_TO; } } @@ -847,8 +847,11 @@ RID RenderingDevice::texture_create_shared(const TextureView &p_view, RID p_with // Create view. Texture texture = *src_texture; + texture.shared_fallback = nullptr; RDD::TextureView tv; + bool create_shared = true; + bool raw_reintepretation = false; if (p_view.format_override == DATA_FORMAT_MAX || p_view.format_override == texture.format) { tv.format = texture.format; } else { @@ -857,13 +860,47 @@ RID RenderingDevice::texture_create_shared(const TextureView &p_view, RID p_with ERR_FAIL_COND_V_MSG(!texture.allowed_shared_formats.has(p_view.format_override), RID(), "Format override is not in the list of allowed shareable formats for original texture."); tv.format = p_view.format_override; + create_shared = driver->texture_can_make_shared_with_format(texture.driver_id, p_view.format_override, raw_reintepretation); } tv.swizzle_r = p_view.swizzle_r; tv.swizzle_g = p_view.swizzle_g; tv.swizzle_b = p_view.swizzle_b; tv.swizzle_a = p_view.swizzle_a; - texture.driver_id = driver->texture_create_shared(texture.driver_id, tv); + if (create_shared) { + texture.driver_id = driver->texture_create_shared(texture.driver_id, tv); + } else { + // The regular view will use the same format as the main texture. + RDD::TextureView regular_view = tv; + regular_view.format = src_texture->format; + texture.driver_id = driver->texture_create_shared(texture.driver_id, regular_view); + + // Create the independent texture for the alias. + RDD::TextureFormat alias_format = texture.texture_format(); + alias_format.format = tv.format; + alias_format.usage_bits = TEXTURE_USAGE_SAMPLING_BIT | TEXTURE_USAGE_CAN_COPY_TO_BIT; + + _texture_check_shared_fallback(src_texture); + _texture_check_shared_fallback(&texture); + + texture.shared_fallback->texture = driver->texture_create(alias_format, tv); + texture.shared_fallback->raw_reinterpretation = raw_reintepretation; + texture_memory += driver->texture_get_allocation_size(texture.shared_fallback->texture); + + RDG::ResourceTracker *tracker = RDG::resource_tracker_create(); + tracker->texture_driver_id = texture.shared_fallback->texture; + tracker->texture_subresources = texture.barrier_range(); + tracker->texture_usage = alias_format.usage_bits; + tracker->reference_count = 1; + texture.shared_fallback->texture_tracker = tracker; + texture.shared_fallback->revision = 0; + + if (raw_reintepretation && src_texture->shared_fallback->buffer.id == 0) { + // For shared textures of the same size, we create the buffer on the main texture if it doesn't have it already. + _texture_create_reinterpret_buffer(src_texture); + } + } + ERR_FAIL_COND_V(!texture.driver_id, RID()); texture.slice_trackers.clear(); @@ -965,6 +1002,7 @@ RID RenderingDevice::texture_create_shared_from_slice(const TextureView &p_view, } Texture texture = *src_texture; + texture.shared_fallback = nullptr; get_image_format_required_size(texture.format, texture.width, texture.height, texture.depth, p_mipmap + 1, &texture.width, &texture.height); texture.mipmaps = p_mipmaps; @@ -979,6 +1017,8 @@ RID RenderingDevice::texture_create_shared_from_slice(const TextureView &p_view, } RDD::TextureView tv; + bool create_shared = true; + bool raw_reintepretation = false; if (p_view.format_override == DATA_FORMAT_MAX || p_view.format_override == texture.format) { tv.format = texture.format; } else { @@ -987,7 +1027,9 @@ RID RenderingDevice::texture_create_shared_from_slice(const TextureView &p_view, ERR_FAIL_COND_V_MSG(!texture.allowed_shared_formats.has(p_view.format_override), RID(), "Format override is not in the list of allowed shareable formats for original texture."); tv.format = p_view.format_override; + create_shared = driver->texture_can_make_shared_with_format(texture.driver_id, p_view.format_override, raw_reintepretation); } + tv.swizzle_r = p_view.swizzle_r; tv.swizzle_g = p_view.swizzle_g; tv.swizzle_b = p_view.swizzle_b; @@ -1000,7 +1042,47 @@ RID RenderingDevice::texture_create_shared_from_slice(const TextureView &p_view, "Specified layer must be a multiple of 6."); } - texture.driver_id = driver->texture_create_shared_from_slice(src_texture->driver_id, tv, p_slice_type, p_layer, slice_layers, p_mipmap, p_mipmaps); + if (create_shared) { + texture.driver_id = driver->texture_create_shared_from_slice(src_texture->driver_id, tv, p_slice_type, p_layer, slice_layers, p_mipmap, p_mipmaps); + } else { + // The regular view will use the same format as the main texture. + RDD::TextureView regular_view = tv; + regular_view.format = src_texture->format; + texture.driver_id = driver->texture_create_shared_from_slice(src_texture->driver_id, regular_view, p_slice_type, p_layer, slice_layers, p_mipmap, p_mipmaps); + + // Create the independent texture for the slice. + RDD::TextureSubresourceRange slice_range = texture.barrier_range(); + slice_range.base_mipmap = 0; + slice_range.base_layer = 0; + + RDD::TextureFormat slice_format = texture.texture_format(); + slice_format.width = MAX(texture.width >> p_mipmap, 1U); + slice_format.height = MAX(texture.height >> p_mipmap, 1U); + slice_format.depth = MAX(texture.depth >> p_mipmap, 1U); + slice_format.format = tv.format; + slice_format.usage_bits = TEXTURE_USAGE_SAMPLING_BIT | TEXTURE_USAGE_CAN_COPY_TO_BIT; + + _texture_check_shared_fallback(src_texture); + _texture_check_shared_fallback(&texture); + + texture.shared_fallback->texture = driver->texture_create(slice_format, tv); + texture.shared_fallback->raw_reinterpretation = raw_reintepretation; + texture_memory += driver->texture_get_allocation_size(texture.shared_fallback->texture); + + RDG::ResourceTracker *tracker = RDG::resource_tracker_create(); + tracker->texture_driver_id = texture.shared_fallback->texture; + tracker->texture_subresources = slice_range; + tracker->texture_usage = slice_format.usage_bits; + tracker->reference_count = 1; + texture.shared_fallback->texture_tracker = tracker; + texture.shared_fallback->revision = 0; + + if (raw_reintepretation && src_texture->shared_fallback->buffer.id == 0) { + // For shared texture slices, we create the buffer on the slice if the source texture has no reinterpretation buffer. + _texture_create_reinterpret_buffer(&texture); + } + } + ERR_FAIL_COND_V(!texture.driver_id, RID()); const Rect2i slice_rect(p_mipmap, p_layer, p_mipmaps, slice_layers); @@ -1093,15 +1175,18 @@ Error RenderingDevice::_texture_update(RID p_texture, uint32_t p_layer, const Ve // When using the setup queue directly, we transition the texture to the optimal layout. RDD::TextureBarrier tb; tb.texture = texture->driver_id; - tb.dst_access = RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT; + tb.dst_access = RDD::BARRIER_ACCESS_COPY_WRITE_BIT; tb.prev_layout = RDD::TEXTURE_LAYOUT_UNDEFINED; - tb.next_layout = RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL; + tb.next_layout = RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL; tb.subresources.aspect = texture->barrier_aspect_flags; tb.subresources.mipmap_count = texture->mipmaps; tb.subresources.base_layer = p_layer; tb.subresources.layer_count = 1; - driver->command_pipeline_barrier(frames[frame].setup_command_buffer, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, RDD::PIPELINE_STAGE_TRANSFER_BIT, {}, {}, tb); + driver->command_pipeline_barrier(frames[frame].setup_command_buffer, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, RDD::PIPELINE_STAGE_COPY_BIT, {}, {}, tb); + } else if (!p_use_setup_queue) { + // Indicate the texture will get modified for the shared texture fallback. + _texture_update_shared_fallback(p_texture, texture, true); } uint32_t mipmap_offset = 0; @@ -1199,7 +1284,7 @@ Error RenderingDevice::_texture_update(RID p_texture, uint32_t p_layer, const Ve copy_region.texture_region_size = Vector3i(region_logic_w, region_logic_h, 1); if (p_use_setup_queue) { - driver->command_copy_buffer_to_texture(frames[frame].setup_command_buffer, staging_buffer_blocks[staging_buffer_current].driver_id, texture->driver_id, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, copy_region); + driver->command_copy_buffer_to_texture(frames[frame].setup_command_buffer, staging_buffer_blocks[staging_buffer_current].driver_id, texture->driver_id, RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL, copy_region); } else { RDG::RecordedBufferToTextureCopy buffer_to_texture_copy; buffer_to_texture_copy.from_buffer = staging_buffer_blocks[staging_buffer_current].driver_id; @@ -1221,14 +1306,14 @@ Error RenderingDevice::_texture_update(RID p_texture, uint32_t p_layer, const Ve // If the texture does not have a tracker, it means it must be transitioned to the sampling state. RDD::TextureBarrier tb; tb.texture = texture->driver_id; - tb.src_access = RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT; - tb.prev_layout = RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL; + tb.src_access = RDD::BARRIER_ACCESS_COPY_WRITE_BIT; + tb.prev_layout = RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL; tb.next_layout = RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; tb.subresources.aspect = texture->barrier_aspect_flags; tb.subresources.mipmap_count = texture->mipmaps; tb.subresources.base_layer = p_layer; tb.subresources.layer_count = 1; - driver->command_pipeline_barrier(frames[frame].setup_command_buffer, RDD::PIPELINE_STAGE_TRANSFER_BIT, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, {}, {}, tb); + driver->command_pipeline_barrier(frames[frame].setup_command_buffer, RDD::PIPELINE_STAGE_COPY_BIT, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, {}, {}, tb); } else if (!p_use_setup_queue && !command_buffer_to_texture_copies_vector.is_empty()) { if (_texture_make_mutable(texture, p_texture)) { // The texture must be mutable to be used as a copy destination. @@ -1241,6 +1326,186 @@ Error RenderingDevice::_texture_update(RID p_texture, uint32_t p_layer, const Ve return OK; } +void RenderingDevice::_texture_check_shared_fallback(Texture *p_texture) { + if (p_texture->shared_fallback == nullptr) { + p_texture->shared_fallback = memnew(Texture::SharedFallback); + } +} + +void RenderingDevice::_texture_update_shared_fallback(RID p_texture_rid, Texture *p_texture, bool p_for_writing) { + if (p_texture->shared_fallback == nullptr) { + // This texture does not use any of the shared texture fallbacks. + return; + } + + if (p_texture->owner.is_valid()) { + Texture *owner_texture = texture_owner.get_or_null(p_texture->owner); + ERR_FAIL_NULL(owner_texture); + if (p_for_writing) { + // Only the main texture is used for writing when using the shared fallback. + owner_texture->shared_fallback->revision++; + } else if (p_texture->shared_fallback->revision != owner_texture->shared_fallback->revision) { + // Copy the contents of the main texture into the shared texture fallback slice. Update the revision. + _texture_copy_shared(p_texture->owner, owner_texture, p_texture_rid, p_texture); + p_texture->shared_fallback->revision = owner_texture->shared_fallback->revision; + } + } else if (p_for_writing) { + // Increment the revision of the texture so shared texture fallback slices must be updated. + p_texture->shared_fallback->revision++; + } +} + +void RenderingDevice::_texture_free_shared_fallback(Texture *p_texture) { + if (p_texture->shared_fallback != nullptr) { + if (p_texture->shared_fallback->texture_tracker != nullptr) { + RDG::resource_tracker_free(p_texture->shared_fallback->texture_tracker); + } + + if (p_texture->shared_fallback->buffer_tracker != nullptr) { + RDG::resource_tracker_free(p_texture->shared_fallback->buffer_tracker); + } + + if (p_texture->shared_fallback->texture.id != 0) { + texture_memory -= driver->texture_get_allocation_size(p_texture->shared_fallback->texture); + driver->texture_free(p_texture->shared_fallback->texture); + } + + if (p_texture->shared_fallback->buffer.id != 0) { + buffer_memory -= driver->buffer_get_allocation_size(p_texture->shared_fallback->buffer); + driver->buffer_free(p_texture->shared_fallback->buffer); + } + + memdelete(p_texture->shared_fallback); + p_texture->shared_fallback = nullptr; + } +} + +void RenderingDevice::_texture_copy_shared(RID p_src_texture_rid, Texture *p_src_texture, RID p_dst_texture_rid, Texture *p_dst_texture) { + // The only type of copying allowed is from the main texture to the slice texture, as slice textures are not allowed to be used for writing when using this fallback. + DEV_ASSERT(p_src_texture != nullptr); + DEV_ASSERT(p_dst_texture != nullptr); + DEV_ASSERT(p_src_texture->owner.is_null()); + DEV_ASSERT(p_dst_texture->owner == p_src_texture_rid); + + bool src_made_mutable = _texture_make_mutable(p_src_texture, p_src_texture_rid); + bool dst_made_mutable = _texture_make_mutable(p_dst_texture, p_dst_texture_rid); + if (src_made_mutable || dst_made_mutable) { + draw_graph.add_synchronization(); + } + + if (p_dst_texture->shared_fallback->raw_reinterpretation) { + // If one of the textures is a main texture and they have a reinterpret buffer, we prefer using that as it's guaranteed to be big enough to hold + // anything and it's how the shared textures that don't use slices are created. + bool src_has_buffer = p_src_texture->shared_fallback->buffer.id != 0; + bool dst_has_buffer = p_dst_texture->shared_fallback->buffer.id != 0; + bool from_src = p_src_texture->owner.is_null() && src_has_buffer; + bool from_dst = p_dst_texture->owner.is_null() && dst_has_buffer; + if (!from_src && !from_dst) { + // If neither texture passed the condition, we just pick whichever texture has a reinterpretation buffer. + from_src = src_has_buffer; + from_dst = dst_has_buffer; + } + + // Pick the buffer and tracker to use from the right texture. + RDD::BufferID shared_buffer; + RDG::ResourceTracker *shared_buffer_tracker = nullptr; + if (from_src) { + shared_buffer = p_src_texture->shared_fallback->buffer; + shared_buffer_tracker = p_src_texture->shared_fallback->buffer_tracker; + } else if (from_dst) { + shared_buffer = p_dst_texture->shared_fallback->buffer; + shared_buffer_tracker = p_dst_texture->shared_fallback->buffer_tracker; + } else { + DEV_ASSERT(false && "This path should not be reachable."); + } + + // FIXME: When using reinterpretation buffers, the only texture aspect supported is color. Depth or stencil contents won't get copied. + RDD::BufferTextureCopyRegion get_data_region; + RDG::RecordedBufferToTextureCopy update_copy; + RDD::TextureCopyableLayout first_copyable_layout; + RDD::TextureCopyableLayout copyable_layout; + RDD::TextureSubresource texture_subresource; + texture_subresource.aspect = RDD::TEXTURE_ASPECT_COLOR; + texture_subresource.layer = 0; + texture_subresource.mipmap = 0; + driver->texture_get_copyable_layout(p_dst_texture->shared_fallback->texture, texture_subresource, &first_copyable_layout); + + // Copying each mipmap from main texture to a buffer and then to the slice texture. + thread_local LocalVector get_data_vector; + thread_local LocalVector update_vector; + get_data_vector.clear(); + update_vector.clear(); + for (uint32_t i = 0; i < p_dst_texture->mipmaps; i++) { + driver->texture_get_copyable_layout(p_dst_texture->shared_fallback->texture, texture_subresource, ©able_layout); + + uint32_t mipmap = p_dst_texture->base_mipmap + i; + get_data_region.buffer_offset = copyable_layout.offset - first_copyable_layout.offset; + get_data_region.texture_subresources.aspect = RDD::TEXTURE_ASPECT_COLOR_BIT; + get_data_region.texture_subresources.base_layer = p_dst_texture->base_layer; + get_data_region.texture_subresources.mipmap = mipmap; + get_data_region.texture_subresources.layer_count = p_dst_texture->layers; + get_data_region.texture_region_size.x = MAX(1U, p_src_texture->width >> mipmap); + get_data_region.texture_region_size.y = MAX(1U, p_src_texture->height >> mipmap); + get_data_region.texture_region_size.z = MAX(1U, p_src_texture->depth >> mipmap); + get_data_vector.push_back(get_data_region); + + update_copy.from_buffer = shared_buffer; + update_copy.region.buffer_offset = get_data_region.buffer_offset; + update_copy.region.texture_subresources.aspect = RDD::TEXTURE_ASPECT_COLOR_BIT; + update_copy.region.texture_subresources.base_layer = texture_subresource.layer; + update_copy.region.texture_subresources.mipmap = texture_subresource.mipmap; + update_copy.region.texture_subresources.layer_count = get_data_region.texture_subresources.layer_count; + update_copy.region.texture_region_size.x = get_data_region.texture_region_size.x; + update_copy.region.texture_region_size.y = get_data_region.texture_region_size.y; + update_copy.region.texture_region_size.z = get_data_region.texture_region_size.z; + update_vector.push_back(update_copy); + + texture_subresource.mipmap++; + } + + draw_graph.add_texture_get_data(p_src_texture->driver_id, p_src_texture->draw_tracker, shared_buffer, get_data_vector, shared_buffer_tracker); + draw_graph.add_texture_update(p_dst_texture->shared_fallback->texture, p_dst_texture->shared_fallback->texture_tracker, update_vector, shared_buffer_tracker); + } else { + // Raw reinterpretation is not required. Use a regular texture copy. + RDD::TextureCopyRegion copy_region; + copy_region.src_subresources.aspect = p_src_texture->read_aspect_flags; + copy_region.src_subresources.base_layer = p_dst_texture->base_layer; + copy_region.src_subresources.layer_count = p_dst_texture->layers; + copy_region.dst_subresources.aspect = p_dst_texture->read_aspect_flags; + copy_region.dst_subresources.base_layer = 0; + copy_region.dst_subresources.layer_count = copy_region.src_subresources.layer_count; + + // Copying each mipmap from main texture to to the slice texture. + thread_local LocalVector region_vector; + region_vector.clear(); + for (uint32_t i = 0; i < p_dst_texture->mipmaps; i++) { + uint32_t mipmap = p_dst_texture->base_mipmap + i; + copy_region.src_subresources.mipmap = mipmap; + copy_region.dst_subresources.mipmap = i; + copy_region.size.x = MAX(1U, p_src_texture->width >> mipmap); + copy_region.size.y = MAX(1U, p_src_texture->height >> mipmap); + copy_region.size.z = MAX(1U, p_src_texture->depth >> mipmap); + region_vector.push_back(copy_region); + } + + draw_graph.add_texture_copy(p_src_texture->driver_id, p_src_texture->draw_tracker, p_dst_texture->shared_fallback->texture, p_dst_texture->shared_fallback->texture_tracker, region_vector); + } +} + +void RenderingDevice::_texture_create_reinterpret_buffer(Texture *p_texture) { + uint64_t row_pitch_step = driver->api_trait_get(RDD::API_TRAIT_TEXTURE_DATA_ROW_PITCH_STEP); + uint64_t transfer_alignment = driver->api_trait_get(RDD::API_TRAIT_TEXTURE_TRANSFER_ALIGNMENT); + uint32_t pixel_bytes = get_image_format_pixel_size(p_texture->format); + uint32_t row_pitch = STEPIFY(p_texture->width * pixel_bytes, row_pitch_step); + uint64_t buffer_size = STEPIFY(pixel_bytes * row_pitch * p_texture->height * p_texture->depth, transfer_alignment); + p_texture->shared_fallback->buffer = driver->buffer_create(buffer_size, RDD::BUFFER_USAGE_TRANSFER_FROM_BIT | RDD::BUFFER_USAGE_TRANSFER_TO_BIT, RDD::MEMORY_ALLOCATION_TYPE_GPU); + buffer_memory += driver->buffer_get_allocation_size(p_texture->shared_fallback->buffer); + + RDG::ResourceTracker *tracker = RDG::resource_tracker_create(); + tracker->buffer_driver_id = p_texture->shared_fallback->buffer; + p_texture->shared_fallback->buffer_tracker = tracker; +} + Vector RenderingDevice::_texture_get_data(Texture *tex, uint32_t p_layer, bool p_2d) { uint32_t width, height, depth; uint32_t tight_mip_size = get_image_format_required_size(tex->format, tex->width, tex->height, p_2d ? 1 : tex->depth, tex->mipmaps, &width, &height, &depth); @@ -1535,6 +1800,9 @@ Error RenderingDevice::texture_copy(RID p_from_texture, RID p_to_texture, const copy_region.size = p_size; + // Indicate the texture will get modified for the shared texture fallback. + _texture_update_shared_fallback(p_to_texture, dst_tex, true); + // The textures must be mutable to be used in the copy operation. bool src_made_mutable = _texture_make_mutable(src_tex, p_from_texture); bool dst_made_mutable = _texture_make_mutable(dst_tex, p_to_texture); @@ -1578,6 +1846,9 @@ Error RenderingDevice::texture_resolve_multisample(RID p_from_texture, RID p_to_ ERR_FAIL_COND_V_MSG(src_tex->read_aspect_flags != dst_tex->read_aspect_flags, ERR_INVALID_PARAMETER, "Source and destination texture must be of the same type (color or depth)."); + // Indicate the texture will get modified for the shared texture fallback. + _texture_update_shared_fallback(p_to_texture, dst_tex, true); + // The textures must be mutable to be used in the resolve operation. bool src_made_mutable = _texture_make_mutable(src_tex, p_from_texture); bool dst_made_mutable = _texture_make_mutable(dst_tex, p_to_texture); @@ -1620,6 +1891,9 @@ Error RenderingDevice::texture_clear(RID p_texture, const Color &p_color, uint32 range.base_layer = src_tex->base_layer + p_base_layer; range.layer_count = p_layers; + // Indicate the texture will get modified for the shared texture fallback. + _texture_update_shared_fallback(p_texture, src_tex, true); + if (_texture_make_mutable(src_tex, p_texture)) { // The texture must be mutable to be used as a clear destination. draw_graph.add_synchronization(); @@ -2526,6 +2800,14 @@ RID RenderingDevice::uniform_buffer_create(uint32_t p_size_bytes, const Vectorshared_textures_to_update) { + Texture *texture = texture_owner.get_or_null(shared.texture); + ERR_CONTINUE(texture == nullptr); + _texture_update_shared_fallback(shared.texture, texture, shared.writing); + } +} + RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p_shader, uint32_t p_shader_set) { _THREAD_SAFE_METHOD_ @@ -2554,6 +2836,7 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p Vector draw_trackers; Vector draw_trackers_usage; HashMap untracked_usage; + Vector shared_textures_to_update; for (uint32_t i = 0; i < set_uniform_count; i++) { const ShaderUniform &set_uniform = set_uniforms[i]; @@ -2619,8 +2902,16 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p attachable_textures.push_back(attachable_texture); } - if (texture->draw_tracker != nullptr) { - draw_trackers.push_back(texture->draw_tracker); + RDD::TextureID driver_id = texture->driver_id; + RDG::ResourceTracker *tracker = texture->draw_tracker; + if (texture->shared_fallback != nullptr && texture->shared_fallback->texture.id != 0) { + driver_id = texture->shared_fallback->texture; + tracker = texture->shared_fallback->texture_tracker; + shared_textures_to_update.push_back({ false, texture_id }); + } + + if (tracker != nullptr) { + draw_trackers.push_back(tracker); draw_trackers_usage.push_back(RDG::RESOURCE_USAGE_TEXTURE_SAMPLE); } else { untracked_usage[texture_id] = RDG::RESOURCE_USAGE_TEXTURE_SAMPLE; @@ -2629,7 +2920,7 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p DEV_ASSERT(!texture->owner.is_valid() || texture_owner.get_or_null(texture->owner)); driver_uniform.ids.push_back(*sampler_driver_id); - driver_uniform.ids.push_back(texture->driver_id); + driver_uniform.ids.push_back(driver_id); } } break; case UNIFORM_TYPE_TEXTURE: { @@ -2656,8 +2947,16 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p attachable_textures.push_back(attachable_texture); } - if (texture->draw_tracker != nullptr) { - draw_trackers.push_back(texture->draw_tracker); + RDD::TextureID driver_id = texture->driver_id; + RDG::ResourceTracker *tracker = texture->draw_tracker; + if (texture->shared_fallback != nullptr && texture->shared_fallback->texture.id != 0) { + driver_id = texture->shared_fallback->texture; + tracker = texture->shared_fallback->texture_tracker; + shared_textures_to_update.push_back({ false, texture_id }); + } + + if (tracker != nullptr) { + draw_trackers.push_back(tracker); draw_trackers_usage.push_back(RDG::RESOURCE_USAGE_TEXTURE_SAMPLE); } else { untracked_usage[texture_id] = RDG::RESOURCE_USAGE_TEXTURE_SAMPLE; @@ -2665,7 +2964,7 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p DEV_ASSERT(!texture->owner.is_valid() || texture_owner.get_or_null(texture->owner)); - driver_uniform.ids.push_back(texture->driver_id); + driver_uniform.ids.push_back(driver_id); } } break; case UNIFORM_TYPE_IMAGE: { @@ -2687,6 +2986,10 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p ERR_FAIL_COND_V_MSG(!(texture->usage_flags & TEXTURE_USAGE_STORAGE_BIT), RID(), "Image (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") needs the TEXTURE_USAGE_STORAGE_BIT usage flag set in order to be used as uniform."); + if (texture->owner.is_null() && texture->shared_fallback != nullptr) { + shared_textures_to_update.push_back({ true, texture_id }); + } + if (_texture_make_mutable(texture, texture_id)) { // The texture must be mutable as a layout transition will be required. draw_graph.add_synchronization(); @@ -2872,6 +3175,7 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p uniform_set.draw_trackers = draw_trackers; uniform_set.draw_trackers_usage = draw_trackers_usage; uniform_set.untracked_usage = untracked_usage; + uniform_set.shared_textures_to_update = shared_textures_to_update; uniform_set.shader_set = p_shader_set; uniform_set.shader_id = p_shader; @@ -3344,12 +3648,16 @@ Error RenderingDevice::_draw_list_render_pass_begin(Framebuffer *p_framebuffer, for (int i = 0; i < p_framebuffer->texture_ids.size(); i++) { RDD::RenderPassClearValue clear_value; - Texture *texture = texture_owner.get_or_null(p_framebuffer->texture_ids[i]); + RID texture_rid = p_framebuffer->texture_ids[i]; + Texture *texture = texture_owner.get_or_null(texture_rid); if (!texture) { color_index++; continue; } + // Indicate the texture will get modified for the shared texture fallback. + _texture_update_shared_fallback(texture_rid, texture, true); + if (texture->usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { if (color_index < p_clear_colors.size()) { ERR_FAIL_INDEX_V(color_index, p_clear_colors.size(), ERR_BUG); // A bug. @@ -3813,6 +4121,8 @@ void RenderingDevice::draw_list_draw(DrawListID p_list, bool p_use_indices, uint draw_graph.add_draw_list_bind_uniform_set(dl->state.pipeline_shader_driver_id, dl->state.sets[i].uniform_set_driver_id, i); UniformSet *uniform_set = uniform_set_owner.get_or_null(dl->state.sets[i].uniform_set); + _uniform_set_update_shared(uniform_set); + draw_graph.add_draw_list_usages(uniform_set->draw_trackers, uniform_set->draw_trackers_usage); dl->state.sets[i].bound = true; @@ -4219,6 +4529,8 @@ void RenderingDevice::compute_list_dispatch(ComputeListID p_list, uint32_t p_x_g draw_graph.add_compute_list_bind_uniform_set(cl->state.pipeline_shader_driver_id, cl->state.sets[i].uniform_set_driver_id, i); UniformSet *uniform_set = uniform_set_owner.get_or_null(cl->state.sets[i].uniform_set); + _uniform_set_update_shared(uniform_set); + draw_graph.add_compute_list_usages(uniform_set->draw_trackers, uniform_set->draw_trackers_usage); cl->state.sets[i].bound = true; @@ -4326,6 +4638,8 @@ void RenderingDevice::compute_list_dispatch_indirect(ComputeListID p_list, RID p draw_graph.add_compute_list_bind_uniform_set(cl->state.pipeline_shader_driver_id, cl->state.sets[i].uniform_set_driver_id, i); UniformSet *uniform_set = uniform_set_owner.get_or_null(cl->state.sets[i].uniform_set); + _uniform_set_update_shared(uniform_set); + draw_graph.add_compute_list_usages(uniform_set->draw_trackers, uniform_set->draw_trackers_usage); cl->state.sets[i].bound = true; @@ -4417,6 +4731,7 @@ bool RenderingDevice::_texture_make_mutable(Texture *p_texture, RID p_texture_id draw_tracker->parent = owner_texture->draw_tracker; draw_tracker->texture_driver_id = p_texture->driver_id; draw_tracker->texture_subresources = p_texture->barrier_range(); + draw_tracker->texture_usage = p_texture->usage_flags; draw_tracker->texture_slice_or_dirty_rect = p_texture->slice_rect; owner_texture->slice_trackers[p_texture->slice_rect] = draw_tracker; } @@ -4438,6 +4753,7 @@ bool RenderingDevice::_texture_make_mutable(Texture *p_texture, RID p_texture_id p_texture->draw_tracker = RDG::resource_tracker_create(); p_texture->draw_tracker->texture_driver_id = p_texture->driver_id; p_texture->draw_tracker->texture_subresources = p_texture->barrier_range(); + p_texture->draw_tracker->texture_usage = p_texture->usage_flags; p_texture->draw_tracker->reference_count = 1; if (p_texture_id.is_valid()) { @@ -4830,6 +5146,8 @@ void RenderingDevice::_free_pending_resources(int p_frame) { WARN_PRINT("Deleted a texture while it was bound."); } + _texture_free_shared_fallback(texture); + texture_memory -= driver->texture_get_allocation_size(texture->driver_id); driver->texture_free(texture->driver_id); diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index 42773fc34716..25ba066cebf1 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -207,6 +207,15 @@ class RenderingDevice : public RenderingDeviceCommons { // for a framebuffer to render into it. struct Texture { + struct SharedFallback { + uint32_t revision = 1; + RDD::TextureID texture; + RDG::ResourceTracker *texture_tracker = nullptr; + RDD::BufferID buffer; + RDG::ResourceTracker *buffer_tracker = nullptr; + bool raw_reinterpretation = false; + }; + RDD::TextureID driver_id; TextureType type = TEXTURE_TYPE_MAX; @@ -235,6 +244,7 @@ class RenderingDevice : public RenderingDeviceCommons { RDG::ResourceTracker *draw_tracker = nullptr; HashMap slice_trackers; + SharedFallback *shared_fallback = nullptr; RDD::TextureSubresourceRange barrier_range() const { RDD::TextureSubresourceRange r; @@ -245,6 +255,22 @@ class RenderingDevice : public RenderingDeviceCommons { r.layer_count = layers; return r; } + + TextureFormat texture_format() const { + TextureFormat tf; + tf.format = format; + tf.width = width; + tf.height = height; + tf.depth = depth; + tf.array_layers = layers; + tf.mipmaps = mipmaps; + tf.texture_type = type; + tf.samples = samples; + tf.usage_bits = usage_flags; + tf.shareable_formats = allowed_shared_formats; + tf.is_resolve_buffer = is_resolve_buffer; + return tf; + } }; RID_Owner texture_owner; @@ -252,6 +278,11 @@ class RenderingDevice : public RenderingDeviceCommons { Vector _texture_get_data(Texture *tex, uint32_t p_layer, bool p_2d = false); Error _texture_update(RID p_texture, uint32_t p_layer, const Vector &p_data, bool p_use_setup_queue, bool p_validate_can_update); + void _texture_check_shared_fallback(Texture *p_texture); + void _texture_update_shared_fallback(RID p_texture_rid, Texture *p_texture, bool p_for_writing); + void _texture_free_shared_fallback(Texture *p_texture); + void _texture_copy_shared(RID p_src_texture_rid, Texture *p_src_texture, RID p_dst_texture_rid, Texture *p_dst_texture); + void _texture_create_reinterpret_buffer(Texture *p_texture); public: struct TextureView { @@ -916,16 +947,24 @@ class RenderingDevice : public RenderingDeviceCommons { RID texture; }; + struct SharedTexture { + uint32_t writing = 0; + RID texture; + }; + LocalVector attachable_textures; // Used for validation. Vector draw_trackers; Vector draw_trackers_usage; HashMap untracked_usage; + LocalVector shared_textures_to_update; InvalidationCallback invalidated_callback = nullptr; void *invalidated_callback_userdata = nullptr; }; RID_Owner uniform_set_owner; + void _uniform_set_update_shared(UniformSet *p_uniform_set); + public: RID uniform_set_create(const Vector &p_uniforms, RID p_shader, uint32_t p_shader_set); bool uniform_set_is_valid(RID p_uniform_set); diff --git a/servers/rendering/rendering_device_driver.cpp b/servers/rendering/rendering_device_driver.cpp index be74467340a6..3b8e3efeb8e9 100644 --- a/servers/rendering/rendering_device_driver.cpp +++ b/servers/rendering/rendering_device_driver.cpp @@ -372,6 +372,8 @@ uint64_t RenderingDeviceDriver::api_trait_get(ApiTrait p_trait) { return 1; case API_TRAIT_SECONDARY_VIEWPORT_SCISSOR: return 1; + case API_TRAIT_CLEARS_WITH_COPY_ENGINE: + return true; default: ERR_FAIL_V(0); } diff --git a/servers/rendering/rendering_device_driver.h b/servers/rendering/rendering_device_driver.h index e9464ba3218c..51cefb188813 100644 --- a/servers/rendering/rendering_device_driver.h +++ b/servers/rendering/rendering_device_driver.h @@ -220,15 +220,17 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { enum TextureLayout { TEXTURE_LAYOUT_UNDEFINED, - TEXTURE_LAYOUT_GENERAL, + TEXTURE_LAYOUT_STORAGE_OPTIMAL, TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, TEXTURE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL, TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, - TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL, - TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, - TEXTURE_LAYOUT_PREINITIALIZED, - TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL = 1000164003, + TEXTURE_LAYOUT_COPY_SRC_OPTIMAL, + TEXTURE_LAYOUT_COPY_DST_OPTIMAL, + TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL, + TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL, + TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL, + TEXTURE_LAYOUT_MAX }; enum TextureAspect { @@ -284,6 +286,7 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { virtual uint8_t *texture_map(TextureID p_texture, const TextureSubresource &p_subresource) = 0; virtual void texture_unmap(TextureID p_texture) = 0; virtual BitField texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) = 0; + virtual bool texture_can_make_shared_with_format(TextureID p_texture, DataFormat p_format, bool &r_raw_reinterpretation) = 0; /*****************/ /**** SAMPLER ****/ @@ -317,10 +320,12 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT = (1 << 9), PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT = (1 << 10), PIPELINE_STAGE_COMPUTE_SHADER_BIT = (1 << 11), - PIPELINE_STAGE_TRANSFER_BIT = (1 << 12), + PIPELINE_STAGE_COPY_BIT = (1 << 12), PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT = (1 << 13), + PIPELINE_STAGE_RESOLVE_BIT = (1 << 14), PIPELINE_STAGE_ALL_GRAPHICS_BIT = (1 << 15), PIPELINE_STAGE_ALL_COMMANDS_BIT = (1 << 16), + PIPELINE_STAGE_CLEAR_STORAGE_BIT = (1 << 17), }; enum BarrierAccessBits { @@ -335,13 +340,16 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { BARRIER_ACCESS_COLOR_ATTACHMENT_WRITE_BIT = (1 << 8), BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT = (1 << 9), BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT = (1 << 10), - BARRIER_ACCESS_TRANSFER_READ_BIT = (1 << 11), - BARRIER_ACCESS_TRANSFER_WRITE_BIT = (1 << 12), + BARRIER_ACCESS_COPY_READ_BIT = (1 << 11), + BARRIER_ACCESS_COPY_WRITE_BIT = (1 << 12), BARRIER_ACCESS_HOST_READ_BIT = (1 << 13), BARRIER_ACCESS_HOST_WRITE_BIT = (1 << 14), BARRIER_ACCESS_MEMORY_READ_BIT = (1 << 15), BARRIER_ACCESS_MEMORY_WRITE_BIT = (1 << 16), BARRIER_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT = (1 << 23), + BARRIER_ACCESS_RESOLVE_READ_BIT = (1 << 24), + BARRIER_ACCESS_RESOLVE_WRITE_BIT = (1 << 25), + BARRIER_ACCESS_STORAGE_CLEAR_BIT = (1 << 27), }; struct MemoryBarrier { @@ -735,7 +743,9 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { API_TRAIT_TEXTURE_TRANSFER_ALIGNMENT, API_TRAIT_TEXTURE_DATA_ROW_PITCH_STEP, API_TRAIT_SECONDARY_VIEWPORT_SCISSOR, + API_TRAIT_CLEARS_WITH_COPY_ENGINE, }; + enum ShaderChangeInvalidation { SHADER_CHANGE_INVALIDATION_ALL_BOUND_UNIFORM_SETS, // What Vulkan does. diff --git a/servers/rendering/rendering_device_graph.cpp b/servers/rendering/rendering_device_graph.cpp index c7de5c67cb3e..221ec72e4a43 100644 --- a/servers/rendering/rendering_device_graph.cpp +++ b/servers/rendering/rendering_device_graph.cpp @@ -36,7 +36,8 @@ #define PRINT_COMMAND_RECORDING 0 RenderingDeviceGraph::RenderingDeviceGraph() { - // Default initialization. + driver_honors_barriers = false; + driver_clears_with_copy_engine = false; } RenderingDeviceGraph::~RenderingDeviceGraph() { @@ -44,7 +45,8 @@ RenderingDeviceGraph::~RenderingDeviceGraph() { bool RenderingDeviceGraph::_is_write_usage(ResourceUsage p_usage) { switch (p_usage) { - case RESOURCE_USAGE_TRANSFER_FROM: + case RESOURCE_USAGE_COPY_FROM: + case RESOURCE_USAGE_RESOLVE_FROM: case RESOURCE_USAGE_UNIFORM_BUFFER_READ: case RESOURCE_USAGE_INDIRECT_BUFFER_READ: case RESOURCE_USAGE_TEXTURE_BUFFER_READ: @@ -54,7 +56,8 @@ bool RenderingDeviceGraph::_is_write_usage(ResourceUsage p_usage) { case RESOURCE_USAGE_TEXTURE_SAMPLE: case RESOURCE_USAGE_STORAGE_IMAGE_READ: return false; - case RESOURCE_USAGE_TRANSFER_TO: + case RESOURCE_USAGE_COPY_TO: + case RESOURCE_USAGE_RESOLVE_TO: case RESOURCE_USAGE_TEXTURE_BUFFER_READ_WRITE: case RESOURCE_USAGE_STORAGE_BUFFER_READ_WRITE: case RESOURCE_USAGE_STORAGE_IMAGE_READ_WRITE: @@ -69,15 +72,19 @@ bool RenderingDeviceGraph::_is_write_usage(ResourceUsage p_usage) { RDD::TextureLayout RenderingDeviceGraph::_usage_to_image_layout(ResourceUsage p_usage) { switch (p_usage) { - case RESOURCE_USAGE_TRANSFER_FROM: - return RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL; - case RESOURCE_USAGE_TRANSFER_TO: - return RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL; + case RESOURCE_USAGE_COPY_FROM: + return RDD::TEXTURE_LAYOUT_COPY_SRC_OPTIMAL; + case RESOURCE_USAGE_COPY_TO: + return RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL; + case RESOURCE_USAGE_RESOLVE_FROM: + return RDD::TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL; + case RESOURCE_USAGE_RESOLVE_TO: + return RDD::TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL; case RESOURCE_USAGE_TEXTURE_SAMPLE: return RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; case RESOURCE_USAGE_STORAGE_IMAGE_READ: case RESOURCE_USAGE_STORAGE_IMAGE_READ_WRITE: - return RDD::TEXTURE_LAYOUT_GENERAL; + return RDD::TEXTURE_LAYOUT_STORAGE_OPTIMAL; case RESOURCE_USAGE_ATTACHMENT_COLOR_READ_WRITE: return RDD::TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; case RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ_WRITE: @@ -97,10 +104,14 @@ RDD::BarrierAccessBits RenderingDeviceGraph::_usage_to_access_bits(ResourceUsage switch (p_usage) { case RESOURCE_USAGE_NONE: return RDD::BarrierAccessBits(0); - case RESOURCE_USAGE_TRANSFER_FROM: - return RDD::BARRIER_ACCESS_TRANSFER_READ_BIT; - case RESOURCE_USAGE_TRANSFER_TO: - return RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT; + case RESOURCE_USAGE_COPY_FROM: + return RDD::BARRIER_ACCESS_COPY_READ_BIT; + case RESOURCE_USAGE_COPY_TO: + return RDD::BARRIER_ACCESS_COPY_WRITE_BIT; + case RESOURCE_USAGE_RESOLVE_FROM: + return RDD::BARRIER_ACCESS_RESOLVE_READ_BIT; + case RESOURCE_USAGE_RESOLVE_TO: + return RDD::BARRIER_ACCESS_RESOLVE_WRITE_BIT; case RESOURCE_USAGE_UNIFORM_BUFFER_READ: return RDD::BARRIER_ACCESS_UNIFORM_READ_BIT; case RESOURCE_USAGE_INDIRECT_BUFFER_READ: @@ -818,26 +829,27 @@ void RenderingDeviceGraph::_run_render_commands(int32_t p_level, const RecordedC } break; case RecordedCommand::TYPE_TEXTURE_CLEAR: { const RecordedTextureClearCommand *texture_clear_command = reinterpret_cast(command); - driver->command_clear_color_texture(r_command_buffer, texture_clear_command->texture, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, texture_clear_command->color, texture_clear_command->range); + driver->command_clear_color_texture(r_command_buffer, texture_clear_command->texture, RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL, texture_clear_command->color, texture_clear_command->range); } break; case RecordedCommand::TYPE_TEXTURE_COPY: { const RecordedTextureCopyCommand *texture_copy_command = reinterpret_cast(command); - driver->command_copy_texture(r_command_buffer, texture_copy_command->from_texture, RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL, texture_copy_command->to_texture, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, texture_copy_command->region); + const VectorView command_texture_copy_regions_view(texture_copy_command->texture_copy_regions(), texture_copy_command->texture_copy_regions_count); + driver->command_copy_texture(r_command_buffer, texture_copy_command->from_texture, RDD::TEXTURE_LAYOUT_COPY_SRC_OPTIMAL, texture_copy_command->to_texture, RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL, command_texture_copy_regions_view); } break; case RecordedCommand::TYPE_TEXTURE_GET_DATA: { const RecordedTextureGetDataCommand *texture_get_data_command = reinterpret_cast(command); const VectorView command_buffer_texture_copy_regions_view(texture_get_data_command->buffer_texture_copy_regions(), texture_get_data_command->buffer_texture_copy_regions_count); - driver->command_copy_texture_to_buffer(r_command_buffer, texture_get_data_command->from_texture, RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL, texture_get_data_command->to_buffer, command_buffer_texture_copy_regions_view); + driver->command_copy_texture_to_buffer(r_command_buffer, texture_get_data_command->from_texture, RDD::TEXTURE_LAYOUT_COPY_SRC_OPTIMAL, texture_get_data_command->to_buffer, command_buffer_texture_copy_regions_view); } break; case RecordedCommand::TYPE_TEXTURE_RESOLVE: { const RecordedTextureResolveCommand *texture_resolve_command = reinterpret_cast(command); - driver->command_resolve_texture(r_command_buffer, texture_resolve_command->from_texture, RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL, texture_resolve_command->src_layer, texture_resolve_command->src_mipmap, texture_resolve_command->to_texture, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, texture_resolve_command->dst_layer, texture_resolve_command->dst_mipmap); + driver->command_resolve_texture(r_command_buffer, texture_resolve_command->from_texture, RDD::TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL, texture_resolve_command->src_layer, texture_resolve_command->src_mipmap, texture_resolve_command->to_texture, RDD::TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL, texture_resolve_command->dst_layer, texture_resolve_command->dst_mipmap); } break; case RecordedCommand::TYPE_TEXTURE_UPDATE: { const RecordedTextureUpdateCommand *texture_update_command = reinterpret_cast(command); const RecordedBufferToTextureCopy *command_buffer_to_texture_copies = texture_update_command->buffer_to_texture_copies(); for (uint32_t j = 0; j < texture_update_command->buffer_to_texture_copies_count; j++) { - driver->command_copy_buffer_to_texture(r_command_buffer, command_buffer_to_texture_copies[j].from_buffer, texture_update_command->to_texture, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, command_buffer_to_texture_copies[j].region); + driver->command_copy_buffer_to_texture(r_command_buffer, command_buffer_to_texture_copies[j].from_buffer, texture_update_command->to_texture, RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL, command_buffer_to_texture_copies[j].region); } } break; case RecordedCommand::TYPE_CAPTURE_TIMESTAMP: { @@ -1271,6 +1283,7 @@ void RenderingDeviceGraph::initialize(RDD *p_driver, RenderingContextDriver::Dev } driver_honors_barriers = driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS); + driver_clears_with_copy_engine = driver->api_trait_get(RDD::API_TRAIT_CLEARS_WITH_COPY_ENGINE); } void RenderingDeviceGraph::finalize() { @@ -1321,12 +1334,12 @@ void RenderingDeviceGraph::add_buffer_clear(RDD::BufferID p_dst, ResourceTracker int32_t command_index; RecordedBufferClearCommand *command = static_cast(_allocate_command(sizeof(RecordedBufferClearCommand), command_index)); command->type = RecordedCommand::TYPE_BUFFER_CLEAR; - command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->self_stages = RDD::PIPELINE_STAGE_COPY_BIT; command->buffer = p_dst; command->offset = p_offset; command->size = p_size; - ResourceUsage usage = RESOURCE_USAGE_TRANSFER_TO; + ResourceUsage usage = RESOURCE_USAGE_COPY_TO; _add_command_to_graph(&p_dst_tracker, &usage, 1, command_index, command); } @@ -1337,13 +1350,13 @@ void RenderingDeviceGraph::add_buffer_copy(RDD::BufferID p_src, ResourceTracker int32_t command_index; RecordedBufferCopyCommand *command = static_cast(_allocate_command(sizeof(RecordedBufferCopyCommand), command_index)); command->type = RecordedCommand::TYPE_BUFFER_COPY; - command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->self_stages = RDD::PIPELINE_STAGE_COPY_BIT; command->source = p_src; command->destination = p_dst; command->region = p_region; ResourceTracker *trackers[2] = { p_dst_tracker, p_src_tracker }; - ResourceUsage usages[2] = { RESOURCE_USAGE_TRANSFER_TO, RESOURCE_USAGE_TRANSFER_FROM }; + ResourceUsage usages[2] = { RESOURCE_USAGE_COPY_TO, RESOURCE_USAGE_COPY_FROM }; _add_command_to_graph(trackers, usages, p_src_tracker != nullptr ? 2 : 1, command_index, command); } @@ -1352,13 +1365,13 @@ void RenderingDeviceGraph::add_buffer_get_data(RDD::BufferID p_src, ResourceTrac int32_t command_index; RecordedBufferGetDataCommand *command = static_cast(_allocate_command(sizeof(RecordedBufferGetDataCommand), command_index)); command->type = RecordedCommand::TYPE_BUFFER_GET_DATA; - command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->self_stages = RDD::PIPELINE_STAGE_COPY_BIT; command->source = p_src; command->destination = p_dst; command->region = p_region; if (p_src_tracker != nullptr) { - ResourceUsage usage = RESOURCE_USAGE_TRANSFER_FROM; + ResourceUsage usage = RESOURCE_USAGE_COPY_FROM; _add_command_to_graph(&p_src_tracker, &usage, 1, command_index, command); } else { _add_command_to_graph(nullptr, nullptr, 0, command_index, command); @@ -1373,7 +1386,7 @@ void RenderingDeviceGraph::add_buffer_update(RDD::BufferID p_dst, ResourceTracke int32_t command_index; RecordedBufferUpdateCommand *command = static_cast(_allocate_command(command_size, command_index)); command->type = RecordedCommand::TYPE_BUFFER_UPDATE; - command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->self_stages = RDD::PIPELINE_STAGE_COPY_BIT; command->destination = p_dst; command->buffer_copies_count = p_buffer_copies.size(); @@ -1382,7 +1395,7 @@ void RenderingDeviceGraph::add_buffer_update(RDD::BufferID p_dst, ResourceTracke buffer_copies[i] = p_buffer_copies[i]; } - ResourceUsage buffer_usage = RESOURCE_USAGE_TRANSFER_TO; + ResourceUsage buffer_usage = RESOURCE_USAGE_COPY_TO; _add_command_to_graph(&p_dst_tracker, &buffer_usage, 1, command_index, command); } @@ -1710,40 +1723,60 @@ void RenderingDeviceGraph::add_texture_clear(RDD::TextureID p_dst, ResourceTrack int32_t command_index; RecordedTextureClearCommand *command = static_cast(_allocate_command(sizeof(RecordedTextureClearCommand), command_index)); command->type = RecordedCommand::TYPE_TEXTURE_CLEAR; - command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; command->texture = p_dst; command->color = p_color; command->range = p_range; - ResourceUsage usage = RESOURCE_USAGE_TRANSFER_TO; + ResourceUsage usage; + if (driver_clears_with_copy_engine) { + command->self_stages = RDD::PIPELINE_STAGE_COPY_BIT; + usage = RESOURCE_USAGE_COPY_TO; + } else { + // If the driver is uncapable of using the copy engine for clearing the image (e.g. D3D12), we must either transition the + // resource to a render target or a storage image as that's the only two ways it can perform the operation. + if (p_dst_tracker->texture_usage & RDD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { + command->self_stages = RDD::PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + usage = RESOURCE_USAGE_ATTACHMENT_COLOR_READ_WRITE; + } else { + command->self_stages = RDD::PIPELINE_STAGE_CLEAR_STORAGE_BIT; + usage = RESOURCE_USAGE_STORAGE_IMAGE_READ_WRITE; + } + } + _add_command_to_graph(&p_dst_tracker, &usage, 1, command_index, command); } -void RenderingDeviceGraph::add_texture_copy(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, RDD::TextureCopyRegion p_region) { +void RenderingDeviceGraph::add_texture_copy(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, VectorView p_texture_copy_regions) { DEV_ASSERT(p_src_tracker != nullptr); DEV_ASSERT(p_dst_tracker != nullptr); int32_t command_index; - RecordedTextureCopyCommand *command = static_cast(_allocate_command(sizeof(RecordedTextureCopyCommand), command_index)); + uint64_t command_size = sizeof(RecordedTextureCopyCommand) + p_texture_copy_regions.size() * sizeof(RDD::TextureCopyRegion); + RecordedTextureCopyCommand *command = static_cast(_allocate_command(command_size, command_index)); command->type = RecordedCommand::TYPE_TEXTURE_COPY; - command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->self_stages = RDD::PIPELINE_STAGE_COPY_BIT; command->from_texture = p_src; command->to_texture = p_dst; - command->region = p_region; + command->texture_copy_regions_count = p_texture_copy_regions.size(); + + RDD::TextureCopyRegion *texture_copy_regions = command->texture_copy_regions(); + for (uint32_t i = 0; i < command->texture_copy_regions_count; i++) { + texture_copy_regions[i] = p_texture_copy_regions[i]; + } ResourceTracker *trackers[2] = { p_dst_tracker, p_src_tracker }; - ResourceUsage usages[2] = { RESOURCE_USAGE_TRANSFER_TO, RESOURCE_USAGE_TRANSFER_FROM }; + ResourceUsage usages[2] = { RESOURCE_USAGE_COPY_TO, RESOURCE_USAGE_COPY_FROM }; _add_command_to_graph(trackers, usages, 2, command_index, command); } -void RenderingDeviceGraph::add_texture_get_data(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::BufferID p_dst, VectorView p_buffer_texture_copy_regions) { +void RenderingDeviceGraph::add_texture_get_data(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::BufferID p_dst, VectorView p_buffer_texture_copy_regions, ResourceTracker *p_dst_tracker) { DEV_ASSERT(p_src_tracker != nullptr); int32_t command_index; uint64_t command_size = sizeof(RecordedTextureGetDataCommand) + p_buffer_texture_copy_regions.size() * sizeof(RDD::BufferTextureCopyRegion); RecordedTextureGetDataCommand *command = static_cast(_allocate_command(command_size, command_index)); command->type = RecordedCommand::TYPE_TEXTURE_GET_DATA; - command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->self_stages = RDD::PIPELINE_STAGE_COPY_BIT; command->from_texture = p_src; command->to_buffer = p_dst; command->buffer_texture_copy_regions_count = p_buffer_texture_copy_regions.size(); @@ -1753,8 +1786,15 @@ void RenderingDeviceGraph::add_texture_get_data(RDD::TextureID p_src, ResourceTr buffer_texture_copy_regions[i] = p_buffer_texture_copy_regions[i]; } - ResourceUsage usage = RESOURCE_USAGE_TRANSFER_FROM; - _add_command_to_graph(&p_src_tracker, &usage, 1, command_index, command); + if (p_dst_tracker != nullptr) { + // Add the optional destination tracker if it was provided. + ResourceTracker *trackers[2] = { p_dst_tracker, p_src_tracker }; + ResourceUsage usages[2] = { RESOURCE_USAGE_COPY_TO, RESOURCE_USAGE_COPY_FROM }; + _add_command_to_graph(trackers, usages, 2, command_index, command); + } else { + ResourceUsage usage = RESOURCE_USAGE_COPY_FROM; + _add_command_to_graph(&p_src_tracker, &usage, 1, command_index, command); + } } void RenderingDeviceGraph::add_texture_resolve(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, uint32_t p_src_layer, uint32_t p_src_mipmap, uint32_t p_dst_layer, uint32_t p_dst_mipmap) { @@ -1764,7 +1804,7 @@ void RenderingDeviceGraph::add_texture_resolve(RDD::TextureID p_src, ResourceTra int32_t command_index; RecordedTextureResolveCommand *command = static_cast(_allocate_command(sizeof(RecordedTextureResolveCommand), command_index)); command->type = RecordedCommand::TYPE_TEXTURE_RESOLVE; - command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->self_stages = RDD::PIPELINE_STAGE_RESOLVE_BIT; command->from_texture = p_src; command->to_texture = p_dst; command->src_layer = p_src_layer; @@ -1773,18 +1813,18 @@ void RenderingDeviceGraph::add_texture_resolve(RDD::TextureID p_src, ResourceTra command->dst_mipmap = p_dst_mipmap; ResourceTracker *trackers[2] = { p_dst_tracker, p_src_tracker }; - ResourceUsage usages[2] = { RESOURCE_USAGE_TRANSFER_TO, RESOURCE_USAGE_TRANSFER_FROM }; + ResourceUsage usages[2] = { RESOURCE_USAGE_RESOLVE_TO, RESOURCE_USAGE_RESOLVE_FROM }; _add_command_to_graph(trackers, usages, 2, command_index, command); } -void RenderingDeviceGraph::add_texture_update(RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, VectorView p_buffer_copies) { +void RenderingDeviceGraph::add_texture_update(RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, VectorView p_buffer_copies, VectorView p_buffer_trackers) { DEV_ASSERT(p_dst_tracker != nullptr); int32_t command_index; uint64_t command_size = sizeof(RecordedTextureUpdateCommand) + p_buffer_copies.size() * sizeof(RecordedBufferToTextureCopy); RecordedTextureUpdateCommand *command = static_cast(_allocate_command(command_size, command_index)); command->type = RecordedCommand::TYPE_TEXTURE_UPDATE; - command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->self_stages = RDD::PIPELINE_STAGE_COPY_BIT; command->to_texture = p_dst; command->buffer_to_texture_copies_count = p_buffer_copies.size(); @@ -1793,8 +1833,25 @@ void RenderingDeviceGraph::add_texture_update(RDD::TextureID p_dst, ResourceTrac buffer_to_texture_copies[i] = p_buffer_copies[i]; } - ResourceUsage usage = RESOURCE_USAGE_TRANSFER_TO; - _add_command_to_graph(&p_dst_tracker, &usage, 1, command_index, command); + if (p_buffer_trackers.size() > 0) { + // Add the optional buffer trackers if they were provided. + thread_local LocalVector trackers; + thread_local LocalVector usages; + trackers.clear(); + usages.clear(); + for (uint32_t i = 0; i < p_buffer_trackers.size(); i++) { + trackers.push_back(p_buffer_trackers[i]); + usages.push_back(RESOURCE_USAGE_COPY_FROM); + } + + trackers.push_back(p_dst_tracker); + usages.push_back(RESOURCE_USAGE_COPY_TO); + + _add_command_to_graph(trackers.ptr(), usages.ptr(), trackers.size(), command_index, command); + } else { + ResourceUsage usage = RESOURCE_USAGE_COPY_TO; + _add_command_to_graph(&p_dst_tracker, &usage, 1, command_index, command); + } } void RenderingDeviceGraph::add_capture_timestamp(RDD::QueryPoolID p_query_pool, uint32_t p_index) { diff --git a/servers/rendering/rendering_device_graph.h b/servers/rendering/rendering_device_graph.h index a96382e0cc41..baa15f63f611 100644 --- a/servers/rendering/rendering_device_graph.h +++ b/servers/rendering/rendering_device_graph.h @@ -129,8 +129,10 @@ class RenderingDeviceGraph { enum ResourceUsage { RESOURCE_USAGE_NONE, - RESOURCE_USAGE_TRANSFER_FROM, - RESOURCE_USAGE_TRANSFER_TO, + RESOURCE_USAGE_COPY_FROM, + RESOURCE_USAGE_COPY_TO, + RESOURCE_USAGE_RESOLVE_FROM, + RESOURCE_USAGE_RESOLVE_TO, RESOURCE_USAGE_UNIFORM_BUFFER_READ, RESOURCE_USAGE_INDIRECT_BUFFER_READ, RESOURCE_USAGE_TEXTURE_BUFFER_READ, @@ -161,6 +163,7 @@ class RenderingDeviceGraph { RDD::BufferID buffer_driver_id; RDD::TextureID texture_driver_id; RDD::TextureSubresourceRange texture_subresources; + uint32_t texture_usage = 0; int32_t texture_slice_command_index = -1; ResourceTracker *parent = nullptr; ResourceTracker *dirty_shared_list = nullptr; @@ -337,7 +340,15 @@ class RenderingDeviceGraph { struct RecordedTextureCopyCommand : RecordedCommand { RDD::TextureID from_texture; RDD::TextureID to_texture; - RDD::TextureCopyRegion region; + uint32_t texture_copy_regions_count = 0; + + _FORCE_INLINE_ RDD::TextureCopyRegion *texture_copy_regions() { + return reinterpret_cast(&this[1]); + } + + _FORCE_INLINE_ const RDD::TextureCopyRegion *texture_copy_regions() const { + return reinterpret_cast(&this[1]); + } }; struct RecordedTextureGetDataCommand : RecordedCommand { @@ -596,7 +607,8 @@ class RenderingDeviceGraph { int32_t command_synchronization_index = -1; bool command_synchronization_pending = false; BarrierGroup barrier_group; - bool driver_honors_barriers = false; + bool driver_honors_barriers : 1; + bool driver_clears_with_copy_engine : 1; WorkaroundsState workarounds_state; TightLocalVector frames; uint32_t frame = 0; @@ -672,10 +684,10 @@ class RenderingDeviceGraph { void add_draw_list_usages(VectorView p_trackers, VectorView p_usages); void add_draw_list_end(); void add_texture_clear(RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, const Color &p_color, const RDD::TextureSubresourceRange &p_range); - void add_texture_copy(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, RDD::TextureCopyRegion p_region); - void add_texture_get_data(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::BufferID p_dst, VectorView p_buffer_texture_copy_regions); + void add_texture_copy(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, VectorView p_texture_copy_regions); + void add_texture_get_data(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::BufferID p_dst, VectorView p_buffer_texture_copy_regions, ResourceTracker *p_dst_tracker = nullptr); void add_texture_resolve(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, uint32_t p_src_layer, uint32_t p_src_mipmap, uint32_t p_dst_layer, uint32_t p_dst_mipmap); - void add_texture_update(RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, VectorView p_buffer_copies); + void add_texture_update(RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, VectorView p_buffer_copies, VectorView p_buffer_trackers = VectorView()); void add_capture_timestamp(RDD::QueryPoolID p_query_pool, uint32_t p_index); void add_synchronization(); void begin_label(const String &p_label_name, const Color &p_color);