Fix Vulkan CPU/GPU synchronization

The original code had two bugs: 1. It assumed Swapchain count can be used for proper CPU/GPU synchronization (it's not) 2. It used a double buffer scheme (FRAME_LAG = 2) but the vkWaitForFences/vkResetFences calls were in the wrong order and in the wrong place 3. The swapchain bug forced a quad-buffer scheme with roughly 1 or 2 frames of wait; which hid all the issues This commit gets rid of RenderingDeviceVulkan::frame_count & RenderingDeviceVulkan::frame which should've never existed and makes everything depend on VulkanContext::frame_index & VulkanContext::FRAME_LAG Add ability to tweak FRAME_LAG from within Project Settings Adds new setting display/window/vsync/buffer_count Adds new setting display/window/vsync/swapchain_count See godotengine#80550 for details ------------------------------------------------------------------ Fix comment grammar Use Godot's functions instead of std ------------------------------------------------------------------ Additionally handle inconsistent state when destroying windows: window.image_acquired_semaphores[i] contains uninitialized values. This means that if VulkanContext::_update_swap_chain had failed early (this can happen for valid reasons and due to unexpected failures) then _clean_up_swap_chain will try to destroy an invalid handle. This fix always sets the handle to VK_NULL_HANDLE. Likewise perform the checks in various other resources in _clean_up_swap_chain in case the resources were partially initialized and are now in an inconsistent state. The API should guarantee the call to be valid if the handle was VK_NULL_HANDLE, but I don't trust driver implementations to handle that case. Affects godotengine#81670 Co-authored-by: Clay John <[email protected]>
darksylinc · Jan 14, 2024 · 7f71a42 · 7f71a42
1 parent 26b1fd0
commit 7f71a42
Show file tree

Hide file tree

Showing 7 changed files with 127 additions and 50 deletions.
diff --git a/core/config/project_settings.cpp b/core/config/project_settings.cpp
@@ -1441,6 +1441,8 @@ ProjectSettings::ProjectSettings() {
 	// Keep the enum values in sync with the `DisplayServer::VSyncMode` enum.
 	custom_prop_info["display/window/vsync/vsync_mode"] = PropertyInfo(Variant::INT, "display/window/vsync/vsync_mode", PROPERTY_HINT_ENUM, "Disabled,Enabled,Adaptive,Mailbox");
 	custom_prop_info["rendering/driver/threads/thread_model"] = PropertyInfo(Variant::INT, "rendering/driver/threads/thread_model", PROPERTY_HINT_ENUM, "Single-Unsafe,Single-Safe,Multi-Threaded");
+	GLOBAL_DEF_RST(PropertyInfo(Variant::INT, "display/window/vsync/buffer_count", PROPERTY_HINT_RANGE, "1,3,1"), 2);
+	GLOBAL_DEF_RST(PropertyInfo(Variant::INT, "display/window/vsync/swapchain_count", PROPERTY_HINT_RANGE, "1,4,1"), 3);
 	GLOBAL_DEF("physics/2d/run_on_separate_thread", false);
 	GLOBAL_DEF("physics/3d/run_on_separate_thread", false);
 

diff --git a/doc/classes/ProjectSettings.xml b/doc/classes/ProjectSettings.xml
@@ -854,6 +854,23 @@
 		<member name="display/window/subwindows/embed_subwindows" type="bool" setter="" getter="" default="true">
 			If [code]true[/code] subwindows are embedded in the main window.
 		</member>
+		<member name="display/window/vsync/buffer_count" type="int" setter="" getter="" default="2">
+			Sets the number of buffers before stalling to wait for the GPU.
+			1 may give you the lowest lag/latency but at the high cost of no parallelism between CPU &amp; GPU.
+			Try the [url=https://darksylinc.github.io/vsync_simulator/]V-Sync Simulator[/url], an interactive interface that simulates presentation to better understand how it is affected by different variables under various conditions.
+			[b]Note:[/b] This setting is not supported by all APIs.
+			[b]Note:[/b] This property is only read when the project starts. There is currently no way to change this value at run-time.
+		</member>
+		<member name="display/window/vsync/swapchain_count" type="int" setter="" getter="" default="3">
+			Sets the number of swapchains (back buffer + front buffer).
+			[code]2[/code] corresponds to double buffering and [code]3[/code] to triple buffering. A value of [code]1[/code] is not recommended.
+			Double buffering may give you the lowest lag/latency but if VSync is on and the system can't render at 60 fps, the framerate will go down in multiples of it (e.g. 30 fps, 15, 7.5, etc ). Triple buffering gives you higher framerate (specially if the system can't reach a constant 60 fps) at the cost of up to 1 frame of latency (when VSync is On in FIFO mode).
+			Use Double buffering if V-Sync is off. Triple buffering is a must if you plan on using V-Sync MAILBOX mode.
+			Try the [url=https://darksylinc.github.io/vsync_simulator/]V-Sync Simulator[/url], an interactive interface that simulates presentation to better understand how it is affected by different variables under various conditions.
+			[b]Note:[/b] This setting is not supported by all APIs.
+			[b]Note:[/b] This property is only read when the project starts. There is currently no way to change this value at run-time.
+			[b]Note:[/b] Some platforms may restrict the actual value.
+		</member>
 		<member name="display/window/vsync/vsync_mode" type="int" setter="" getter="" default="1">
 			Sets the V-Sync mode for the main game window.
 			See [enum DisplayServer.VSyncMode] for possible values and how they affect the behavior of your application.

diff --git a/drivers/vulkan/vulkan_context.cpp b/drivers/vulkan/vulkan_context.cpp
@@ -1802,7 +1802,10 @@ Error VulkanContext::_create_semaphores() {
 		/*pNext*/ nullptr,
 		/*flags*/ VK_FENCE_CREATE_SIGNALED_BIT
 	};
-	for (uint32_t i = 0; i < FRAME_LAG; i++) {
+
+	CRASH_COND_MSG(frame_count == 0, "Used before initialization.");
+
+	for (uint32_t i = 0; i < frame_count; i++) {
 		err = vkCreateFence(device, &fence_ci, nullptr, &fences[i]);
 		ERR_FAIL_COND_V(err, ERR_CANT_CREATE);
 
@@ -1867,6 +1870,9 @@ Error VulkanContext::_window_create(DisplayServer::WindowID p_window_id, Display
 	window.width = p_width;
 	window.height = p_height;
 	window.vsync_mode = p_vsync_mode;
+	for (size_t i = 0u; i < MAX_FRAME_LAG; ++i) {
+		window.image_acquired_semaphores[i] = VK_NULL_HANDLE;
+	}
 	Error err = _update_swap_chain(&window);
 	ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE);
 
@@ -1935,26 +1941,37 @@ Error VulkanContext::_clean_up_swap_chain(Window *window) {
 	window->render_pass = VK_NULL_HANDLE;
 	if (window->swapchain_image_resources) {
 		for (uint32_t i = 0; i < swapchainImageCount; i++) {
-			vkDestroyImageView(device, window->swapchain_image_resources[i].view, nullptr);
-			vkDestroyFramebuffer(device, window->swapchain_image_resources[i].framebuffer, nullptr);
+			if (window->swapchain_image_resources[i].view != VK_NULL_HANDLE) {
+				vkDestroyImageView(device, window->swapchain_image_resources[i].view, nullptr);
+				window->swapchain_image_resources[i].view = VK_NULL_HANDLE;
+			}
+			if (window->swapchain_image_resources[i].framebuffer != VK_NULL_HANDLE) {
+				vkDestroyFramebuffer(device, window->swapchain_image_resources[i].framebuffer, nullptr);
+				window->swapchain_image_resources[i].framebuffer = VK_NULL_HANDLE;
+			}
 		}
 
 		free(window->swapchain_image_resources);
 		window->swapchain_image_resources = nullptr;
 		swapchainImageCount = 0;
 	}
 	if (separate_present_queue) {
-		vkDestroyCommandPool(device, window->present_cmd_pool, nullptr);
+		if (window->present_cmd_pool != VK_NULL_HANDLE) {
+			vkDestroyCommandPool(device, window->present_cmd_pool, nullptr);
+			window->present_cmd_pool = VK_NULL_HANDLE;
+		}
 	}
 
-	for (uint32_t i = 0; i < FRAME_LAG; i++) {
-		// Destroy the semaphores now (we'll re-create it later if we have to).
-		// We must do this because the semaphore cannot be reused if it's in a signaled state
-		// (which happens if vkAcquireNextImageKHR returned VK_ERROR_OUT_OF_DATE_KHR or VK_SUBOPTIMAL_KHR)
-		// The only way to reset it would be to present the swapchain... the one we just destroyed.
-		// And the API has no way to "unsignal" the semaphore.
-		vkDestroySemaphore(device, window->image_acquired_semaphores[i], nullptr);
-		window->image_acquired_semaphores[i] = 0;
+	for (uint32_t i = 0; i < frame_count; i++) {
+		if (window->image_acquired_semaphores[i] != VK_NULL_HANDLE) {
+			// Destroy the semaphores now (we'll re-create it later if we have to).
+			// We must do this because the semaphore cannot be reused if it's in a signaled state
+			// (which happens if vkAcquireNextImageKHR returned VK_ERROR_OUT_OF_DATE_KHR or
+			// VK_SUBOPTIMAL_KHR) The only way to reset it would be to present the swapchain...
+			// the one we just destroyed. And the API has no way to "unsignal" the semaphore.
+			vkDestroySemaphore(device, window->image_acquired_semaphores[i], nullptr);
+			window->image_acquired_semaphores[i] = VK_NULL_HANDLE;
+		}
 	}
 
 	return OK;
@@ -2082,18 +2099,13 @@ Error VulkanContext::_update_swap_chain(Window *window) {
 
 	free(presentModes);
 
-	// Determine the number of VkImages to use in the swap chain.
-	// Application desires to acquire 3 images at a time for triple
-	// buffering.
-	uint32_t desiredNumOfSwapchainImages = 3;
-	if (desiredNumOfSwapchainImages < surfCapabilities.minImageCount) {
-		desiredNumOfSwapchainImages = surfCapabilities.minImageCount;
-	}
+	uint32_t desiredNumOfSwapchainImages = MAX(surfCapabilities.minImageCount, swapchain_desired_count);
 	// If maxImageCount is 0, we can ask for as many images as we want;
 	// otherwise we're limited to maxImageCount.
-	if ((surfCapabilities.maxImageCount > 0) && (desiredNumOfSwapchainImages > surfCapabilities.maxImageCount)) {
+	if (surfCapabilities.maxImageCount != 0u) {
 		// Application must settle for fewer images than desired.
-		desiredNumOfSwapchainImages = surfCapabilities.maxImageCount;
+		desiredNumOfSwapchainImages =
+				MIN(surfCapabilities.maxImageCount, desiredNumOfSwapchainImages);
 	}
 
 	VkSurfaceTransformFlagsKHR preTransform;
@@ -2346,7 +2358,9 @@ Error VulkanContext::_update_swap_chain(Window *window) {
 		/*flags*/ 0,
 	};
 
-	for (uint32_t i = 0; i < FRAME_LAG; i++) {
+	CRASH_COND_MSG(frame_count == 0, "Used before initialization.");
+
+	for (uint32_t i = 0; i < frame_count; i++) {
 		VkResult vkerr = vkCreateSemaphore(device, &semaphoreCreateInfo, nullptr, &window->image_acquired_semaphores[i]);
 		ERR_FAIL_COND_V(vkerr, ERR_CANT_CREATE);
 	}
@@ -2454,10 +2468,6 @@ Error VulkanContext::prepare_buffers(RDD::CommandBufferID p_command_buffer) {
 
 	VkResult err;
 
-	// Ensure no more than FRAME_LAG renderings are outstanding.
-	vkWaitForFences(device, 1, &fences[frame_index], VK_TRUE, UINT64_MAX);
-	vkResetFences(device, 1, &fences[frame_index]);
-
 	for (KeyValue<int, Window> &E : windows) {
 		Window *w = &E.value;
 
@@ -2567,6 +2577,7 @@ Error VulkanContext::swap_buffers() {
 	submit_info.pCommandBuffers = commands_ptr;
 	submit_info.signalSemaphoreCount = 1;
 	submit_info.pSignalSemaphores = &draw_complete_semaphores[frame_index];
+	vkResetFences(device, 1, &fences[frame_index]);
 	err = vkQueueSubmit(graphics_queue, 1, &submit_info, fences[frame_index]);
 	ERR_FAIL_COND_V_MSG(err, ERR_CANT_CREATE, "Vulkan: Cannot submit graphics queue. Error code: " + String(string_VkResult(err)));
 
@@ -2703,7 +2714,12 @@ Error VulkanContext::swap_buffers() {
 	err = fpQueuePresentKHR(present_queue, &present);
 
 	frame_index += 1;
-	frame_index %= FRAME_LAG;
+	frame_index %= frame_count;
+
+	// We must wait for the tail frame_index to finish rendering in the GPU, otherwise its resources
+	// (GPU memory address ranges + API handles) may still be in use.
+	// Ideally we'd delay calling this as long as possible; but it's hard to guarantee.
+	vkWaitForFences(device, 1, &fences[frame_index], VK_TRUE, UINT64_MAX);
 
 	if (err == VK_ERROR_OUT_OF_DATE_KHR) {
 		// Swapchain is out of date (e.g. the window was resized) and
@@ -2882,6 +2898,17 @@ bool VulkanContext::is_debug_utils_enabled() const {
 }
 
 VulkanContext::VulkanContext() {
+	frame_count = uint8_t(GLOBAL_DEF_RST("display/window/vsync/buffer_count", 2u));
+	// TODO: In theory it should be possible to have swapchain_desired_count per window.
+	// But it may complicate their management.
+	swapchain_desired_count = uint8_t(GLOBAL_DEF_RST("display/window/vsync/swapchain_count", 3u));
+
+	CRASH_COND_MSG(frame_count < 1 || frame_count > MAX_FRAME_LAG,
+			vformat("display/window/vsync/buffer_count %d out of bounds (must be in range [1; %d).",
+					frame_count, MAX_FRAME_LAG));
+	CRASH_COND_MSG(swapchain_desired_count < 1,
+			"display/window/vsync/swapchain_count out of bounds (must be in range [1; inf).");
+
 	command_buffer_queue.resize(1); // First one is always the setup command.
 	command_buffer_queue[0] = nullptr;
 }
@@ -2894,7 +2921,7 @@ VulkanContext::~VulkanContext() {
 		free(queue_props);
 	}
 	if (device_initialized) {
-		for (uint32_t i = 0; i < FRAME_LAG; i++) {
+		for (uint32_t i = 0; i < frame_count; i++) {
 			vkDestroyFence(device, fences[i], nullptr);
 			vkDestroySemaphore(device, draw_complete_semaphores[i], nullptr);
 			if (separate_present_queue) {

diff --git a/drivers/vulkan/vulkan_context.h b/drivers/vulkan/vulkan_context.h
@@ -93,7 +93,7 @@ class VulkanContext : public ApiContextRD {
 	enum {
 		MAX_EXTENSIONS = 128,
 		MAX_LAYERS = 64,
-		FRAME_LAG = 2
+		MAX_FRAME_LAG = 4
 	};
 
 	static VulkanHooks *vulkan_hooks;
@@ -131,10 +131,11 @@ class VulkanContext : public ApiContextRD {
 	VkQueue present_queue = VK_NULL_HANDLE;
 	VkColorSpaceKHR color_space;
 	VkFormat format;
-	VkSemaphore draw_complete_semaphores[FRAME_LAG];
-	VkSemaphore image_ownership_semaphores[FRAME_LAG];
-	int frame_index = 0;
-	VkFence fences[FRAME_LAG];
+	VkSemaphore draw_complete_semaphores[MAX_FRAME_LAG] = {};
+	VkSemaphore image_ownership_semaphores[MAX_FRAME_LAG] = {};
+	// See swapchainImageCount.
+	uint32_t swapchain_desired_count = 0;
+	VkFence fences[MAX_FRAME_LAG];
 	VkPhysicalDeviceMemoryProperties memory_properties;
 	VkPhysicalDeviceFeatures physical_device_features;
 
@@ -150,7 +151,7 @@ class VulkanContext : public ApiContextRD {
 		VkSwapchainKHR swapchain = VK_NULL_HANDLE;
 		SwapchainImageResources *swapchain_image_resources = VK_NULL_HANDLE;
 		VkPresentModeKHR presentMode = VK_PRESENT_MODE_FIFO_KHR;
-		VkSemaphore image_acquired_semaphores[FRAME_LAG];
+		VkSemaphore image_acquired_semaphores[MAX_FRAME_LAG] = {};
 		bool semaphore_acquired = false;
 		uint32_t current_buffer = 0;
 		int width = 0;

diff --git a/servers/rendering/renderer_rd/api_context_rd.h b/servers/rendering/renderer_rd/api_context_rd.h
@@ -35,6 +35,12 @@
 #include "servers/rendering/rendering_device_driver.h"
 
 class ApiContextRD {
+protected:
+	uint32_t frame_index = 0;
+	// Initialize to 0 because we don't want it to be used before we initialize and read the config
+	// (this value must stay constant throghout VulkanContext's & RenderingDevice's lifetime).
+	uint32_t frame_count = 0;
+
 public:
 	virtual const char *get_api_name() const = 0;
 	virtual RenderingDevice::Capabilities get_device_capabilities() const = 0;
@@ -76,6 +82,9 @@ class ApiContextRD {
 	virtual RenderingDeviceDriver *get_driver(RID p_local_device = RID()) = 0;
 	virtual bool is_debug_utils_enabled() const = 0;
 
+	uint32_t get_frame_index() const { return frame_index; }
+	uint32_t get_frame_count() const { return frame_count; }
+
 	virtual ~ApiContextRD();
 };