diff --git a/core/config/project_settings.cpp b/core/config/project_settings.cpp index 90e2e273206f..226a2c201c82 100644 --- a/core/config/project_settings.cpp +++ b/core/config/project_settings.cpp @@ -1441,6 +1441,8 @@ ProjectSettings::ProjectSettings() { // Keep the enum values in sync with the `DisplayServer::VSyncMode` enum. custom_prop_info["display/window/vsync/vsync_mode"] = PropertyInfo(Variant::INT, "display/window/vsync/vsync_mode", PROPERTY_HINT_ENUM, "Disabled,Enabled,Adaptive,Mailbox"); custom_prop_info["rendering/driver/threads/thread_model"] = PropertyInfo(Variant::INT, "rendering/driver/threads/thread_model", PROPERTY_HINT_ENUM, "Single-Unsafe,Single-Safe,Multi-Threaded"); + GLOBAL_DEF_RST(PropertyInfo(Variant::INT, "display/window/vsync/buffer_count", PROPERTY_HINT_RANGE, "1,3,1"), 2); + GLOBAL_DEF_RST(PropertyInfo(Variant::INT, "display/window/vsync/swapchain_count", PROPERTY_HINT_RANGE, "1,4,1"), 3); GLOBAL_DEF("physics/2d/run_on_separate_thread", false); GLOBAL_DEF("physics/3d/run_on_separate_thread", false); diff --git a/doc/classes/ProjectSettings.xml b/doc/classes/ProjectSettings.xml index e80b349da91f..8f9054d7205f 100644 --- a/doc/classes/ProjectSettings.xml +++ b/doc/classes/ProjectSettings.xml @@ -854,6 +854,23 @@ If [code]true[/code] subwindows are embedded in the main window. + + Sets the number of buffers before stalling to wait for the GPU. + 1 may give you the lowest lag/latency but at the high cost of no parallelism between CPU & GPU. + Try the [url=https://darksylinc.github.io/vsync_simulator/]V-Sync Simulator[/url], an interactive interface that simulates presentation to better understand how it is affected by different variables under various conditions. + [b]Note:[/b] This setting is not supported by all APIs. + [b]Note:[/b] This property is only read when the project starts. There is currently no way to change this value at run-time. + + + Sets the number of swapchains (back buffer + front buffer). + [code]2[/code] corresponds to double buffering and [code]3[/code] to triple buffering. A value of [code]1[/code] is not recommended. + Double buffering may give you the lowest lag/latency but if VSync is on and the system can't render at 60 fps, the framerate will go down in multiples of it (e.g. 30 fps, 15, 7.5, etc ). Triple buffering gives you higher framerate (specially if the system can't reach a constant 60 fps) at the cost of up to 1 frame of latency (when VSync is On in FIFO mode). + Use Double buffering if V-Sync is off. Triple buffering is a must if you plan on using V-Sync MAILBOX mode. + Try the [url=https://darksylinc.github.io/vsync_simulator/]V-Sync Simulator[/url], an interactive interface that simulates presentation to better understand how it is affected by different variables under various conditions. + [b]Note:[/b] This setting is not supported by all APIs. + [b]Note:[/b] This property is only read when the project starts. There is currently no way to change this value at run-time. + [b]Note:[/b] Some platforms may restrict the actual value. + Sets the V-Sync mode for the main game window. See [enum DisplayServer.VSyncMode] for possible values and how they affect the behavior of your application. diff --git a/drivers/vulkan/vulkan_context.cpp b/drivers/vulkan/vulkan_context.cpp index 7db2a9cd661a..0221584490e5 100644 --- a/drivers/vulkan/vulkan_context.cpp +++ b/drivers/vulkan/vulkan_context.cpp @@ -1802,7 +1802,10 @@ Error VulkanContext::_create_semaphores() { /*pNext*/ nullptr, /*flags*/ VK_FENCE_CREATE_SIGNALED_BIT }; - for (uint32_t i = 0; i < FRAME_LAG; i++) { + + CRASH_COND_MSG(frame_count == 0, "Used before initialization."); + + for (uint32_t i = 0; i < frame_count; i++) { err = vkCreateFence(device, &fence_ci, nullptr, &fences[i]); ERR_FAIL_COND_V(err, ERR_CANT_CREATE); @@ -1867,6 +1870,9 @@ Error VulkanContext::_window_create(DisplayServer::WindowID p_window_id, Display window.width = p_width; window.height = p_height; window.vsync_mode = p_vsync_mode; + for (size_t i = 0u; i < MAX_FRAME_LAG; ++i) { + window.image_acquired_semaphores[i] = VK_NULL_HANDLE; + } Error err = _update_swap_chain(&window); ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE); @@ -1935,8 +1941,14 @@ Error VulkanContext::_clean_up_swap_chain(Window *window) { window->render_pass = VK_NULL_HANDLE; if (window->swapchain_image_resources) { for (uint32_t i = 0; i < swapchainImageCount; i++) { - vkDestroyImageView(device, window->swapchain_image_resources[i].view, nullptr); - vkDestroyFramebuffer(device, window->swapchain_image_resources[i].framebuffer, nullptr); + if (window->swapchain_image_resources[i].view != VK_NULL_HANDLE) { + vkDestroyImageView(device, window->swapchain_image_resources[i].view, nullptr); + window->swapchain_image_resources[i].view = VK_NULL_HANDLE; + } + if (window->swapchain_image_resources[i].framebuffer != VK_NULL_HANDLE) { + vkDestroyFramebuffer(device, window->swapchain_image_resources[i].framebuffer, nullptr); + window->swapchain_image_resources[i].framebuffer = VK_NULL_HANDLE; + } } free(window->swapchain_image_resources); @@ -1944,17 +1956,22 @@ Error VulkanContext::_clean_up_swap_chain(Window *window) { swapchainImageCount = 0; } if (separate_present_queue) { - vkDestroyCommandPool(device, window->present_cmd_pool, nullptr); + if (window->present_cmd_pool != VK_NULL_HANDLE) { + vkDestroyCommandPool(device, window->present_cmd_pool, nullptr); + window->present_cmd_pool = VK_NULL_HANDLE; + } } - for (uint32_t i = 0; i < FRAME_LAG; i++) { - // Destroy the semaphores now (we'll re-create it later if we have to). - // We must do this because the semaphore cannot be reused if it's in a signaled state - // (which happens if vkAcquireNextImageKHR returned VK_ERROR_OUT_OF_DATE_KHR or VK_SUBOPTIMAL_KHR) - // The only way to reset it would be to present the swapchain... the one we just destroyed. - // And the API has no way to "unsignal" the semaphore. - vkDestroySemaphore(device, window->image_acquired_semaphores[i], nullptr); - window->image_acquired_semaphores[i] = 0; + for (uint32_t i = 0; i < frame_count; i++) { + if (window->image_acquired_semaphores[i] != VK_NULL_HANDLE) { + // Destroy the semaphores now (we'll re-create it later if we have to). + // We must do this because the semaphore cannot be reused if it's in a signaled state + // (which happens if vkAcquireNextImageKHR returned VK_ERROR_OUT_OF_DATE_KHR or + // VK_SUBOPTIMAL_KHR) The only way to reset it would be to present the swapchain... + // the one we just destroyed. And the API has no way to "unsignal" the semaphore. + vkDestroySemaphore(device, window->image_acquired_semaphores[i], nullptr); + window->image_acquired_semaphores[i] = VK_NULL_HANDLE; + } } return OK; @@ -2082,18 +2099,13 @@ Error VulkanContext::_update_swap_chain(Window *window) { free(presentModes); - // Determine the number of VkImages to use in the swap chain. - // Application desires to acquire 3 images at a time for triple - // buffering. - uint32_t desiredNumOfSwapchainImages = 3; - if (desiredNumOfSwapchainImages < surfCapabilities.minImageCount) { - desiredNumOfSwapchainImages = surfCapabilities.minImageCount; - } + uint32_t desiredNumOfSwapchainImages = MAX(surfCapabilities.minImageCount, swapchain_desired_count); // If maxImageCount is 0, we can ask for as many images as we want; // otherwise we're limited to maxImageCount. - if ((surfCapabilities.maxImageCount > 0) && (desiredNumOfSwapchainImages > surfCapabilities.maxImageCount)) { + if (surfCapabilities.maxImageCount != 0u) { // Application must settle for fewer images than desired. - desiredNumOfSwapchainImages = surfCapabilities.maxImageCount; + desiredNumOfSwapchainImages = + MIN(surfCapabilities.maxImageCount, desiredNumOfSwapchainImages); } VkSurfaceTransformFlagsKHR preTransform; @@ -2346,7 +2358,9 @@ Error VulkanContext::_update_swap_chain(Window *window) { /*flags*/ 0, }; - for (uint32_t i = 0; i < FRAME_LAG; i++) { + CRASH_COND_MSG(frame_count == 0, "Used before initialization."); + + for (uint32_t i = 0; i < frame_count; i++) { VkResult vkerr = vkCreateSemaphore(device, &semaphoreCreateInfo, nullptr, &window->image_acquired_semaphores[i]); ERR_FAIL_COND_V(vkerr, ERR_CANT_CREATE); } @@ -2454,10 +2468,6 @@ Error VulkanContext::prepare_buffers(RDD::CommandBufferID p_command_buffer) { VkResult err; - // Ensure no more than FRAME_LAG renderings are outstanding. - vkWaitForFences(device, 1, &fences[frame_index], VK_TRUE, UINT64_MAX); - vkResetFences(device, 1, &fences[frame_index]); - for (KeyValue &E : windows) { Window *w = &E.value; @@ -2567,6 +2577,7 @@ Error VulkanContext::swap_buffers() { submit_info.pCommandBuffers = commands_ptr; submit_info.signalSemaphoreCount = 1; submit_info.pSignalSemaphores = &draw_complete_semaphores[frame_index]; + vkResetFences(device, 1, &fences[frame_index]); err = vkQueueSubmit(graphics_queue, 1, &submit_info, fences[frame_index]); ERR_FAIL_COND_V_MSG(err, ERR_CANT_CREATE, "Vulkan: Cannot submit graphics queue. Error code: " + String(string_VkResult(err))); @@ -2703,7 +2714,12 @@ Error VulkanContext::swap_buffers() { err = fpQueuePresentKHR(present_queue, &present); frame_index += 1; - frame_index %= FRAME_LAG; + frame_index %= frame_count; + + // We must wait for the tail frame_index to finish rendering in the GPU, otherwise its resources + // (GPU memory address ranges + API handles) may still be in use. + // Ideally we'd delay calling this as long as possible; but it's hard to guarantee. + vkWaitForFences(device, 1, &fences[frame_index], VK_TRUE, UINT64_MAX); if (err == VK_ERROR_OUT_OF_DATE_KHR) { // Swapchain is out of date (e.g. the window was resized) and @@ -2882,6 +2898,17 @@ bool VulkanContext::is_debug_utils_enabled() const { } VulkanContext::VulkanContext() { + frame_count = uint8_t(GLOBAL_DEF_RST("display/window/vsync/buffer_count", 2u)); + // TODO: In theory it should be possible to have swapchain_desired_count per window. + // But it may complicate their management. + swapchain_desired_count = uint8_t(GLOBAL_DEF_RST("display/window/vsync/swapchain_count", 3u)); + + CRASH_COND_MSG(frame_count < 1 || frame_count > MAX_FRAME_LAG, + vformat("display/window/vsync/buffer_count %d out of bounds (must be in range [1; %d).", + frame_count, MAX_FRAME_LAG)); + CRASH_COND_MSG(swapchain_desired_count < 1, + "display/window/vsync/swapchain_count out of bounds (must be in range [1; inf)."); + command_buffer_queue.resize(1); // First one is always the setup command. command_buffer_queue[0] = nullptr; } @@ -2894,7 +2921,7 @@ VulkanContext::~VulkanContext() { free(queue_props); } if (device_initialized) { - for (uint32_t i = 0; i < FRAME_LAG; i++) { + for (uint32_t i = 0; i < frame_count; i++) { vkDestroyFence(device, fences[i], nullptr); vkDestroySemaphore(device, draw_complete_semaphores[i], nullptr); if (separate_present_queue) { diff --git a/drivers/vulkan/vulkan_context.h b/drivers/vulkan/vulkan_context.h index cbb6cf326fa7..d39eecc214e2 100644 --- a/drivers/vulkan/vulkan_context.h +++ b/drivers/vulkan/vulkan_context.h @@ -93,7 +93,7 @@ class VulkanContext : public ApiContextRD { enum { MAX_EXTENSIONS = 128, MAX_LAYERS = 64, - FRAME_LAG = 2 + MAX_FRAME_LAG = 4 }; static VulkanHooks *vulkan_hooks; @@ -131,10 +131,11 @@ class VulkanContext : public ApiContextRD { VkQueue present_queue = VK_NULL_HANDLE; VkColorSpaceKHR color_space; VkFormat format; - VkSemaphore draw_complete_semaphores[FRAME_LAG]; - VkSemaphore image_ownership_semaphores[FRAME_LAG]; - int frame_index = 0; - VkFence fences[FRAME_LAG]; + VkSemaphore draw_complete_semaphores[MAX_FRAME_LAG] = {}; + VkSemaphore image_ownership_semaphores[MAX_FRAME_LAG] = {}; + // See swapchainImageCount. + uint32_t swapchain_desired_count = 0; + VkFence fences[MAX_FRAME_LAG]; VkPhysicalDeviceMemoryProperties memory_properties; VkPhysicalDeviceFeatures physical_device_features; @@ -150,7 +151,7 @@ class VulkanContext : public ApiContextRD { VkSwapchainKHR swapchain = VK_NULL_HANDLE; SwapchainImageResources *swapchain_image_resources = VK_NULL_HANDLE; VkPresentModeKHR presentMode = VK_PRESENT_MODE_FIFO_KHR; - VkSemaphore image_acquired_semaphores[FRAME_LAG]; + VkSemaphore image_acquired_semaphores[MAX_FRAME_LAG] = {}; bool semaphore_acquired = false; uint32_t current_buffer = 0; int width = 0; diff --git a/servers/rendering/renderer_rd/api_context_rd.h b/servers/rendering/renderer_rd/api_context_rd.h index fd3be8060529..fc6abbdd9f7e 100644 --- a/servers/rendering/renderer_rd/api_context_rd.h +++ b/servers/rendering/renderer_rd/api_context_rd.h @@ -35,6 +35,12 @@ #include "servers/rendering/rendering_device_driver.h" class ApiContextRD { +protected: + uint32_t frame_index = 0; + // Initialize to 0 because we don't want it to be used before we initialize and read the config + // (this value must stay constant throghout VulkanContext's & RenderingDevice's lifetime). + uint32_t frame_count = 0; + public: virtual const char *get_api_name() const = 0; virtual RenderingDevice::Capabilities get_device_capabilities() const = 0; @@ -76,6 +82,9 @@ class ApiContextRD { virtual RenderingDeviceDriver *get_driver(RID p_local_device = RID()) = 0; virtual bool is_debug_utils_enabled() const = 0; + uint32_t get_frame_index() const { return frame_index; } + uint32_t get_frame_count() const { return frame_count; } + virtual ~ApiContextRD(); }; diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index 8e03796d3381..787399b6c29f 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -190,6 +190,8 @@ Error RenderingDevice::_staging_buffer_allocate(uint32_t p_amount, uint32_t p_re r_alloc_size = p_amount; r_required_action = STAGING_REQUIRED_ACTION_NONE; + const uint32_t frame_count = context->get_frame_count(); + while (true) { r_alloc_offset = 0; @@ -330,6 +332,8 @@ Error RenderingDevice::_buffer_update(Buffer *p_buffer, RID p_buffer_id, size_t thread_local LocalVector command_buffer_copies_vector; command_buffer_copies_vector.clear(); + const uint32_t frame = context->get_frame_index(); + while (to_submit > 0) { uint32_t block_write_offset; uint32_t block_write_amount; @@ -1026,6 +1030,8 @@ Error RenderingDevice::_texture_update(RID p_texture, uint32_t p_layer, const Ve thread_local LocalVector command_buffer_to_texture_copies_vector; command_buffer_to_texture_copies_vector.clear(); + const uint32_t frame = context->get_frame_index(); + if (p_use_setup_queue && driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { // When using the setup queue directly, we transition the texture to the optimal layout. RDD::TextureBarrier tb; @@ -4372,6 +4378,8 @@ void RenderingDevice::_free_internal(RID p_id) { } #endif + const uint32_t frame = context->get_frame_index(); + // Push everything so it's disposed of next time this frame index is processed (means, it's safe to do it). if (texture_owner.owns(p_id)) { Texture *texture = texture_owner.get_or_null(p_id); @@ -4561,6 +4569,8 @@ void RenderingDevice::_finalize_command_buffers(bool p_postpare) { ERR_PRINT("Found open compute list at the end of the frame, this should never happen (further compute will likely not work)."); } + const uint32_t frame = context->get_frame_index(); + { // Complete the setup buffer (that needs to be processed before anything else). draw_graph.end(frames[frame].draw_command_buffer, RENDER_GRAPH_REORDER, RENDER_GRAPH_FULL_BARRIERS); @@ -4577,6 +4587,8 @@ void RenderingDevice::_finalize_command_buffers(bool p_postpare) { void RenderingDevice::_begin_frame() { draw_graph.begin(); + const uint32_t frame = context->get_frame_index(); + // Erase pending resources. _free_pending_resources(frame); @@ -4630,8 +4642,6 @@ void RenderingDevice::swap_buffers() { context->swap_buffers(); } - frame = (frame + 1) % frame_count; - _begin_frame(); } @@ -4643,6 +4653,8 @@ void RenderingDevice::submit() { _finalize_command_buffers(false); + const uint32_t frame = context->get_frame_index(); + RDD::CommandBufferID command_buffers[2] = { frames[frame].setup_command_buffer, frames[frame].draw_command_buffer }; context->local_device_push_command_buffers(local_device, command_buffers, 2); local_device_processing = true; @@ -4659,7 +4671,7 @@ void RenderingDevice::sync() { local_device_processing = false; } -void RenderingDevice::_free_pending_resources(int p_frame) { +void RenderingDevice::_free_pending_resources(uint32_t p_frame) { // Free in dependency usage order, so nothing weird happens. // Pipelines. while (frames[p_frame].render_pipelines_to_dispose_of.front()) { @@ -4743,12 +4755,13 @@ void RenderingDevice::_free_pending_resources(int p_frame) { void RenderingDevice::prepare_screen_for_drawing() { _THREAD_SAFE_METHOD_ + const uint32_t frame = context->get_frame_index(); context->prepare_buffers(frames[frame].draw_command_buffer); screen_prepared = true; } uint32_t RenderingDevice::get_frame_delay() const { - return frame_count; + return context->get_frame_count(); } uint64_t RenderingDevice::get_memory_usage(MemoryType p_type) const { @@ -4774,6 +4787,8 @@ void RenderingDevice::_flush(bool p_current_frame) { return; // Flushing previous frames has no effect with local device. } + const uint32_t frame = context->get_frame_index(); + // Not doing this crashes RADV (undefined behavior). if (p_current_frame) { draw_graph.end(frames[frame].draw_command_buffer, RENDER_GRAPH_REORDER, RENDER_GRAPH_FULL_BARRIERS); @@ -4817,18 +4832,15 @@ void RenderingDevice::initialize(ApiContextRD *p_context, bool p_local_device) { device_capabilities = p_context->get_device_capabilities(); if (p_local_device) { - frame_count = 1; local_device = context->local_device_create(); - } else { - frame_count = context->get_swapchain_image_count() + 1; // Always need one extra to ensure it's unused at any time, without having to use a fence for this. } driver = context->get_driver(local_device); max_timestamp_query_elements = 256; + const uint32_t frame_count = context->get_frame_count(); frames.resize(frame_count); - frame = 0; // Create setup and frame buffers. - for (int i = 0; i < frame_count; i++) { + for (uint32_t i = 0; i < frame_count; i++) { frames[i].index = 0; // Create command pool, one per frame is recommended. @@ -4868,7 +4880,7 @@ void RenderingDevice::initialize(ApiContextRD *p_context, bool p_local_device) { } } - for (int i = 0; i < frame_count; i++) { + for (uint32_t i = 0; i < frame_count; i++) { // Reset all queries in a query pool before doing any operations with them. driver->command_timestamp_query_pool_reset(frames[0].setup_command_buffer, frames[i].timestamp_pool, max_timestamp_query_elements); } @@ -4893,7 +4905,7 @@ void RenderingDevice::initialize(ApiContextRD *p_context, bool p_local_device) { staging_buffer_current = 0; staging_buffer_used = false; - for (int i = 0; i < frame_count; i++) { + for (uint32_t i = 0; i < frame_count; i++) { // Staging was never used, create a block. Error err = _insert_staging_block(); ERR_CONTINUE(err != OK); @@ -5017,6 +5029,7 @@ void RenderingDevice::_free_rids(T &p_owner, const char *p_type) { void RenderingDevice::capture_timestamp(const String &p_name) { ERR_FAIL_COND_MSG(draw_list != nullptr, "Capturing timestamps during draw list creation is not allowed. Offending timestamp was: " + p_name); ERR_FAIL_COND_MSG(compute_list != nullptr, "Capturing timestamps during compute list creation is not allowed. Offending timestamp was: " + p_name); + const uint32_t frame = context->get_frame_index(); ERR_FAIL_COND(frames[frame].timestamp_count >= max_timestamp_query_elements); draw_graph.add_capture_timestamp(frames[frame].timestamp_pool, frames[frame].timestamp_count); @@ -5095,24 +5108,29 @@ uint64_t RenderingDevice::get_driver_resource(DriverResource p_resource, RID p_r } uint32_t RenderingDevice::get_captured_timestamps_count() const { + const uint32_t frame = context->get_frame_index(); return frames[frame].timestamp_result_count; } uint64_t RenderingDevice::get_captured_timestamps_frame() const { + const uint32_t frame = context->get_frame_index(); return frames[frame].index; } uint64_t RenderingDevice::get_captured_timestamp_gpu_time(uint32_t p_index) const { + const uint32_t frame = context->get_frame_index(); ERR_FAIL_UNSIGNED_INDEX_V(p_index, frames[frame].timestamp_result_count, 0); return driver->timestamp_query_result_to_time(frames[frame].timestamp_result_values[p_index]); } uint64_t RenderingDevice::get_captured_timestamp_cpu_time(uint32_t p_index) const { + const uint32_t frame = context->get_frame_index(); ERR_FAIL_UNSIGNED_INDEX_V(p_index, frames[frame].timestamp_result_count, 0); return frames[frame].timestamp_cpu_result_values[p_index]; } String RenderingDevice::get_captured_timestamp_name(uint32_t p_index) const { + const uint32_t frame = context->get_frame_index(); ERR_FAIL_UNSIGNED_INDEX_V(p_index, frames[frame].timestamp_result_count, String()); return frames[frame].timestamp_result_names[p_index]; } @@ -5175,9 +5193,14 @@ void RenderingDevice::finalize() { } } + const uint32_t frame = context->get_frame_index(); + const uint32_t frame_count = context->get_frame_count(); + + DEV_ASSERT(frame_count == frames.size()); + // Free everything pending. - for (uint32_t i = 0; i < frames.size(); i++) { - int f = (frame + i) % frames.size(); + for (uint32_t i = 0; i < frame_count; i++) { + const uint32_t f = (frame + i) % frame_count; _free_pending_resources(f); driver->command_pool_free(frames[i].command_pool); driver->timestamp_query_pool_free(frames[i].timestamp_pool); diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index 2ccef6630833..86155a44ac72 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -1273,13 +1273,11 @@ class RenderingDevice : public RenderingDeviceCommons { uint32_t max_timestamp_query_elements = 0; TightLocalVector frames; // Frames available, for main device they are cycled (usually 3), for local devices only 1. - int frame = 0; // Current frame. - int frame_count = 0; // Total amount of frames. uint64_t frames_drawn = 0; RID local_device; bool local_device_processing = false; - void _free_pending_resources(int p_frame); + void _free_pending_resources(uint32_t p_frame); ApiContextRD *context = nullptr;