Skip to content

Commit

Permalink
Merge branch 'optick-vk-resync'
Browse files Browse the repository at this point in the history
  • Loading branch information
RobertBeckebans committed Jun 22, 2024
2 parents c5e3917 + 41254b3 commit f539821
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 63 deletions.
6 changes: 1 addition & 5 deletions neo/libs/optick/optick_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,11 +135,7 @@ static const ProcessID INVALID_PROCESS_ID = (ProcessID)-1;
// Asserts
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#if defined(OPTICK_MSVC)
#ifdef UNICODE
#define OPTICK_DEBUG_BREAK(description) OutputDebugString(L"Optick ERROR: " description L"\n"); __debugbreak()
#else
#define OPTICK_DEBUG_BREAK(description) OutputDebugString("Optick ERROR: " description "\n"); __debugbreak()
#endif
#define OPTICK_DEBUG_BREAK(description) OutputDebugString(TEXT("Optick ERROR: ") description TEXT("\n")); __debugbreak()
#elif defined(OPTICK_GCC)
#if __has_builtin(__builtin_debugtrap)
#define OPTICK_DEBUG_BREAK(description) std::cerr << "Optick ERROR: " << description << std::endl; __builtin_debugtrap()
Expand Down
133 changes: 75 additions & 58 deletions neo/libs/optick/optick_gpu.vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,6 @@ namespace Optick
{
class GPUProfilerVulkan : public GPUProfiler
{
private:
VulkanFunctions vulkanFunctions = {};

protected:
struct Frame
{
Expand All @@ -48,7 +45,7 @@ namespace Optick

struct NodePayload
{
VulkanFunctions* vulkanFunctions;
VulkanFunctions vulkanFunctions;
VkDevice device;
VkPhysicalDevice physicalDevice;
VkQueue queue;
Expand Down Expand Up @@ -109,6 +106,8 @@ namespace Optick

void GPUProfilerVulkan::InitDevice(VkInstance instance, VkDevice* devices, VkPhysicalDevice* physicalDevices, VkQueue* cmdQueues, uint32_t* cmdQueuesFamily, uint32_t nodeCount, const VulkanFunctions* functions)
{
VulkanFunctions vulkanFunctions = {};

if (functions != nullptr)
{
vulkanFunctions = *functions;
Expand All @@ -125,7 +124,7 @@ namespace Optick
(PFN_vkAllocateCommandBuffers_)vkAllocateCommandBuffers,
(PFN_vkCreateFence_)vkCreateFence,
vkCmdResetQueryPool,
vkResetQueryPool,
nullptr, // dynamically define vkResetQueryPool via VK_EXT_host_query_reset extension or Vulkan 1.2 hostQueryReset feature
(PFN_vkCmdWaitEvents_)vkCmdWaitEvents,
(PFN_vkResetEvent_)vkResetEvent,
(PFN_vkSetEvent_)vkSetEvent,
Expand Down Expand Up @@ -196,6 +195,9 @@ namespace Optick
vulkanFunctions.vkCreateFence = (PFN_vkCreateFence_)vkGetDeviceProcAddr_(devices[i], "vkCreateFence");
vulkanFunctions.vkCmdResetQueryPool = (PFN_vkCmdResetQueryPool_)vkGetDeviceProcAddr_(devices[i], "vkCmdResetQueryPool");
vulkanFunctions.vkResetQueryPool = (PFN_vkResetQueryPool_)vkGetDeviceProcAddr_(devices[i], "vkResetQueryPool");
if (!vulkanFunctions.vkResetQueryPool) { // if vkResetQueryPool not defined via Vulkan 1.2, try vkResetQueryPoolEXT
vulkanFunctions.vkResetQueryPool = (PFN_vkResetQueryPool_)vkGetDeviceProcAddr_(devices[i], "vkResetQueryPoolEXT");
}
vulkanFunctions.vkCmdWaitEvents = (PFN_vkCmdWaitEvents_)vkGetDeviceProcAddr_(devices[i], "vkCmdWaitEvents");
vulkanFunctions.vkResetEvent = (PFN_vkResetEvent_)vkGetDeviceProcAddr_(devices[i], "vkResetEvent");
vulkanFunctions.vkSetEvent = (PFN_vkSetEvent_)vkGetDeviceProcAddr_(devices[i], "vkSetEvent");
Expand All @@ -215,19 +217,32 @@ namespace Optick
vulkanFunctions.vkGetPastPresentationTimingGOOGLE = (PFN_vkGetPastPresentationTimingGOOGLE_)vkGetDeviceProcAddr_(devices[i], "vkGetPastPresentationTimingGOOGLE");
}
#if OPTICK_STATIC_VULKAN_FUNCTIONS
else if (!vulkanFunctions.vkGetPastPresentationTimingGOOGLE)
else // this condition can also run if vulkanFunctions are manually-defined via the "functions" parameter and vulkanFunctions.vkGetInstanceProcAddr == nullptr
{
vulkanFunctions.vkGetPastPresentationTimingGOOGLE = (PFN_vkGetPastPresentationTimingGOOGLE_)vkGetDeviceProcAddr(devices[i], "vkGetPastPresentationTimingGOOGLE");
// SRS - First check for nullptr to make sure we don't overwrite any manually-defined function pointers
if (!vulkanFunctions.vkResetQueryPool) {
vulkanFunctions.vkResetQueryPool = (PFN_vkResetQueryPool_)vkGetDeviceProcAddr(devices[i], "vkResetQueryPool");
if (!vulkanFunctions.vkResetQueryPool) { // if vkResetQueryPool not defined via Vulkan 1.2, try vkResetQueryPoolEXT
vulkanFunctions.vkResetQueryPool = (PFN_vkResetQueryPool_)vkGetDeviceProcAddr(devices[i], "vkResetQueryPoolEXT");
}
}

if (!vulkanFunctions.vkGetPastPresentationTimingGOOGLE) {
vulkanFunctions.vkGetPastPresentationTimingGOOGLE = (PFN_vkGetPastPresentationTimingGOOGLE_)vkGetDeviceProcAddr(devices[i], "vkGetPastPresentationTimingGOOGLE");
}
}
#endif
if (!vulkanFunctions.vkResetQueryPool) {
OPTICK_FAILED("vkResetQueryPool must be enabled via VK_EXT_host_query_reset extension or Vulkan 1.2 hostQueryReset feature. Can't initialize GPU Profiler!");
}

VkPhysicalDeviceProperties properties = { 0 };
(*vulkanFunctions.vkGetPhysicalDeviceProperties)(physicalDevices[i], &properties);
GPUProfiler::InitNode(properties.deviceName, i);

NodePayload* nodePayload = Memory::New<NodePayload>();
nodePayloads[i] = nodePayload;
nodePayload->vulkanFunctions = &vulkanFunctions;
nodePayload->vulkanFunctions = vulkanFunctions;
nodePayload->device = devices[i];
nodePayload->physicalDevice = physicalDevices[i];
nodePayload->queue = cmdQueues[i];
Expand Down Expand Up @@ -298,25 +313,27 @@ namespace Optick
{
if (currentState == STATE_RUNNING)
{
uint32_t index = nodes[currentNode]->QueryTimestamp(outCpuTimestamp);
(*vulkanFunctions.vkCmdWriteTimestamp)(commandBuffer, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, nodePayloads[currentNode]->queryPool, index);
Node& node = *nodes[currentNode];
NodePayload& payload = *nodePayloads[currentNode];

uint32_t index = node.QueryTimestamp(outCpuTimestamp);
(*payload.vulkanFunctions.vkCmdWriteTimestamp)(commandBuffer, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, payload.queryPool, index);
}
}

void GPUProfilerVulkan::ResolveTimestamps(uint32_t nodeIndex, uint32_t startIndex, uint32_t count)
{
if (count)
{
Node* node = nodes[nodeIndex];

NodePayload* payload = nodePayloads[nodeIndex];
Node& node = *nodes[nodeIndex];
NodePayload& payload = *nodePayloads[nodeIndex];

OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkGetQueryPoolResults)(payload->device, payload->queryPool, startIndex, count, 8 * (size_t)count, &nodes[nodeIndex]->queryGpuTimestamps[startIndex], 8, VK_QUERY_RESULT_64_BIT));
(*vulkanFunctions.vkResetQueryPool)(payload->device, payload->queryPool, startIndex, count);
OPTICK_VK_CHECK((VkResult)(*payload.vulkanFunctions.vkGetQueryPoolResults)(payload.device, payload.queryPool, startIndex, count, 8 * (size_t)count, &node.queryGpuTimestamps[startIndex], 8, VK_QUERY_RESULT_64_BIT));
(*payload.vulkanFunctions.vkResetQueryPool)(payload.device, payload.queryPool, startIndex, count);

// Convert GPU timestamps => CPU Timestamps
for (uint32_t index = startIndex; index < startIndex + count; ++index)
*node->queryCpuTimestamps[index] = node->clock.GetCPUTimestamp(node->queryGpuTimestamps[index]);
*node.queryCpuTimestamps[index] = node.clock.GetCPUTimestamp(node.queryGpuTimestamps[index]);
}
}

Expand All @@ -328,7 +345,7 @@ namespace Optick
do
{
NodePayload& payload = *nodePayloads[nodeIndex];
r = (*vulkanFunctions.vkWaitForFences)(nodePayloads[nodeIndex]->device, 1, &payload.frames[frameNumberToWait % payload.frames.size()].fence, 1, 1000 * 30);
r = (*payload.vulkanFunctions.vkWaitForFences)(payload.device, 1, &payload.frames[frameNumberToWait % payload.frames.size()].fence, 1, 1000 * 30);
} while (r != VK_SUCCESS);
}

Expand Down Expand Up @@ -357,15 +374,15 @@ namespace Optick
VkDevice device = payload.device;
VkQueue queue = payload.queue;

(*vulkanFunctions.vkWaitForFences)(device, 1, &fence, 1, (uint64_t)-1);
(*vulkanFunctions.vkResetFences)(device, 1, &fence);
(*payload.vulkanFunctions.vkWaitForFences)(device, 1, &fence, 1, (uint64_t)-1);
(*payload.vulkanFunctions.vkResetFences)(device, 1, &fence);

VkCommandBufferBeginInfo commandBufferBeginInfo;
commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
commandBufferBeginInfo.pNext = 0;
commandBufferBeginInfo.pInheritanceInfo = 0;
commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkBeginCommandBuffer)(commandBuffer, &commandBufferBeginInfo));
OPTICK_VK_CHECK((VkResult)(*payload.vulkanFunctions.vkBeginCommandBuffer)(commandBuffer, &commandBufferBeginInfo));

if (EventData* frameEvent = currentFrame.frameEvent)
QueryTimestamp(commandBuffer, &frameEvent->finish);
Expand All @@ -376,7 +393,7 @@ namespace Optick
QueryTimestamp(commandBuffer, &AddFrameTag().timestamp);
nextFrame.frameEvent = &event;

OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkEndCommandBuffer)(commandBuffer));
OPTICK_VK_CHECK((VkResult)(*payload.vulkanFunctions.vkEndCommandBuffer)(commandBuffer));
VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.pNext = nullptr;
Expand All @@ -386,7 +403,7 @@ namespace Optick
submitInfo.pCommandBuffers = &commandBuffer;
submitInfo.signalSemaphoreCount = 0;
submitInfo.pSignalSemaphores = nullptr;
OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkQueueSubmit)(queue, 1, &submitInfo, fence));
OPTICK_VK_CHECK((VkResult)(*payload.vulkanFunctions.vkQueueSubmit)(queue, 1, &submitInfo, fence));

uint32_t queryBegin = currentFrame.queryIndexStart;
uint32_t queryEnd = node.queryIndex;
Expand Down Expand Up @@ -422,16 +439,16 @@ namespace Optick
}

// SRS - Add Vulkan presentation / vsync timing if VK_GOOGLE_display_timing extension available
if (vulkanFunctions.vkGetPastPresentationTimingGOOGLE)
if (payload.vulkanFunctions.vkGetPastPresentationTimingGOOGLE)
{
uint32_t queryPresentTimingCount = 0;
(*vulkanFunctions.vkGetPastPresentationTimingGOOGLE)(device, swapChain, &queryPresentTimingCount, nullptr);
(*payload.vulkanFunctions.vkGetPastPresentationTimingGOOGLE)(device, swapChain, &queryPresentTimingCount, nullptr);
if (queryPresentTimingCount > 0)
{
// Query Presentation Timing / VSync
vector<VkPastPresentationTimingGOOGLE> queryPresentTimings;
queryPresentTimings.resize(queryPresentTimingCount);
(*vulkanFunctions.vkGetPastPresentationTimingGOOGLE)(device, swapChain, &queryPresentTimingCount, &queryPresentTimings[0]);
(*payload.vulkanFunctions.vkGetPastPresentationTimingGOOGLE)(device, swapChain, &queryPresentTimingCount, &queryPresentTimings[0]);
for (uint32_t presentIndex = 0; presentIndex < queryPresentTimingCount; presentIndex++)
{
// Process Presentation Timing / VSync if swap image was actually presented (i.e. not dropped)
Expand Down Expand Up @@ -465,24 +482,24 @@ namespace Optick
{
GPUProfiler::ClockSynchronization clock;

NodePayload& node = *nodePayloads[nodeIndex];
Frame& currentFrame = node.frames[frameNumber % NUM_FRAMES_DELAY];
NodePayload& payload = *nodePayloads[nodeIndex];
Frame& currentFrame = payload.frames[frameNumber % NUM_FRAMES_DELAY];

VkCommandBufferBeginInfo commandBufferBeginInfo;
commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
commandBufferBeginInfo.pNext = 0;
commandBufferBeginInfo.pInheritanceInfo = 0;
commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
VkCommandBuffer CB = currentFrame.commandBuffer;
VkDevice Device = node.device;
VkDevice Device = payload.device;
VkFence Fence = currentFrame.fence;

// SRS - Prepare and submit an empty command buffer to wait on app buffer completion
(*vulkanFunctions.vkWaitForFences)(Device, 1, &Fence, 1, (uint64_t)-1);
(*vulkanFunctions.vkResetFences)(Device, 1, &Fence);
(*vulkanFunctions.vkResetCommandBuffer)(CB, VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT);
(*vulkanFunctions.vkBeginCommandBuffer)(CB, &commandBufferBeginInfo);
(*vulkanFunctions.vkEndCommandBuffer)(CB);
(*payload.vulkanFunctions.vkWaitForFences)(Device, 1, &Fence, 1, (uint64_t)-1);
(*payload.vulkanFunctions.vkResetFences)(Device, 1, &Fence);
(*payload.vulkanFunctions.vkResetCommandBuffer)(CB, VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT);
(*payload.vulkanFunctions.vkBeginCommandBuffer)(CB, &commandBufferBeginInfo);
(*payload.vulkanFunctions.vkEndCommandBuffer)(CB);

VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
Expand All @@ -493,48 +510,48 @@ namespace Optick
submitInfo.pCommandBuffers = &CB;
submitInfo.signalSemaphoreCount = 0;
submitInfo.pSignalSemaphores = nullptr;
(*vulkanFunctions.vkQueueSubmit)(nodePayloads[nodeIndex]->queue, 1, &submitInfo, Fence);
(*payload.vulkanFunctions.vkQueueSubmit)(payload.queue, 1, &submitInfo, Fence);

// SRS - Prepare and submit the actual command buffer used for clock synchronization
(*vulkanFunctions.vkWaitForFences)(Device, 1, &Fence, 1, (uint64_t)-1);
(*vulkanFunctions.vkResetFences)(Device, 1, &Fence);
(*vulkanFunctions.vkResetEvent)(Device, nodePayloads[nodeIndex]->event);
(*vulkanFunctions.vkBeginCommandBuffer)(CB, &commandBufferBeginInfo);
(*vulkanFunctions.vkCmdResetQueryPool)(CB, nodePayloads[nodeIndex]->queryPool, 0, 1);
(*vulkanFunctions.vkCmdWaitEvents)(CB, 1, &nodePayloads[nodeIndex]->event, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_HOST_BIT | VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, nullptr, 0, nullptr, 0, nullptr);
(*vulkanFunctions.vkCmdWriteTimestamp)(CB, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, nodePayloads[nodeIndex]->queryPool, 0);
(*vulkanFunctions.vkEndCommandBuffer)(CB);
(*vulkanFunctions.vkQueueSubmit)(nodePayloads[nodeIndex]->queue, 1, &submitInfo, Fence);
(*payload.vulkanFunctions.vkWaitForFences)(Device, 1, &Fence, 1, (uint64_t)-1);
(*payload.vulkanFunctions.vkResetFences)(Device, 1, &Fence);
(*payload.vulkanFunctions.vkResetEvent)(Device, payload.event);
(*payload.vulkanFunctions.vkBeginCommandBuffer)(CB, &commandBufferBeginInfo);
(*payload.vulkanFunctions.vkCmdResetQueryPool)(CB, payload.queryPool, 0, 1);
(*payload.vulkanFunctions.vkCmdWaitEvents)(CB, 1, &payload.event, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_HOST_BIT | VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, nullptr, 0, nullptr, 0, nullptr);
(*payload.vulkanFunctions.vkCmdWriteTimestamp)(CB, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, payload.queryPool, 0);
(*payload.vulkanFunctions.vkEndCommandBuffer)(CB);
(*payload.vulkanFunctions.vkQueueSubmit)(payload.queue, 1, &submitInfo, Fence);

// SRS - Improve GPU to CPU clock offset calibration by using Vulkan events
// thanks to cdwfs for concept at https://gist.github.com/cdwfs/4222ca09cb259f8dd50f7f2cf7d09179
(*vulkanFunctions.vkSetEvent)(Device, nodePayloads[nodeIndex]->event);
(*payload.vulkanFunctions.vkSetEvent)(Device, payload.event);
clock.timestampCPU = GetHighPrecisionTime();
(*vulkanFunctions.vkWaitForFences)(Device, 1, &Fence, 1, (uint64_t)-1);
(*vulkanFunctions.vkResetFences)(Device, 1, &Fence);
(*payload.vulkanFunctions.vkWaitForFences)(Device, 1, &Fence, 1, (uint64_t)-1);
(*payload.vulkanFunctions.vkResetFences)(Device, 1, &Fence);
clock.timestampGPU = 0;
(*vulkanFunctions.vkGetQueryPoolResults)(Device, nodePayloads[nodeIndex]->queryPool, 0, 1, 8, &clock.timestampGPU, 8, VK_QUERY_RESULT_64_BIT);
(*payload.vulkanFunctions.vkGetQueryPoolResults)(Device, payload.queryPool, 0, 1, 8, &clock.timestampGPU, 8, VK_QUERY_RESULT_64_BIT);

// SRS - Improve GPU to CPU clock frequency scaling by using floating point doubles
clock.frequencyCPU = GetHighPrecisionFrequency();
VkPhysicalDeviceProperties Properties;
(*vulkanFunctions.vkGetPhysicalDeviceProperties)(nodePayloads[nodeIndex]->physicalDevice, &Properties);
(*payload.vulkanFunctions.vkGetPhysicalDeviceProperties)(payload.physicalDevice, &Properties);
clock.frequencyGPU = (int64_t)(1000000000.0 / (double)Properties.limits.timestampPeriod);

// SRS - Reset entire query pool to clear clock sync query + any leftover queries from previous run
(*vulkanFunctions.vkBeginCommandBuffer)(CB, &commandBufferBeginInfo);
(*vulkanFunctions.vkCmdResetQueryPool)(CB, nodePayloads[nodeIndex]->queryPool, 0, MAX_QUERIES_COUNT);
(*vulkanFunctions.vkEndCommandBuffer)(CB);
(*vulkanFunctions.vkQueueSubmit)(nodePayloads[nodeIndex]->queue, 1, &submitInfo, Fence);
(*payload.vulkanFunctions.vkBeginCommandBuffer)(CB, &commandBufferBeginInfo);
(*payload.vulkanFunctions.vkCmdResetQueryPool)(CB, payload.queryPool, 0, MAX_QUERIES_COUNT);
(*payload.vulkanFunctions.vkEndCommandBuffer)(CB);
(*payload.vulkanFunctions.vkQueueSubmit)(payload.queue, 1, &submitInfo, Fence);

return clock;
}

GPUProfilerVulkan::NodePayload::~NodePayload()
{
(*vulkanFunctions->vkDestroyEvent)(device, event, nullptr);
(*vulkanFunctions->vkDestroyCommandPool)(device, commandPool, nullptr);
(*vulkanFunctions->vkDestroyQueryPool)(device, queryPool, nullptr);
(*vulkanFunctions.vkDestroyEvent)(device, event, nullptr);
(*vulkanFunctions.vkDestroyCommandPool)(device, commandPool, nullptr);
(*vulkanFunctions.vkDestroyQueryPool)(device, queryPool, nullptr);
}

GPUProfilerVulkan::~GPUProfilerVulkan()
Expand All @@ -543,8 +560,8 @@ namespace Optick
{
for (Frame& frame : payload->frames)
{
(*vulkanFunctions.vkDestroyFence)(payload->device, frame.fence, nullptr);
(*vulkanFunctions.vkFreeCommandBuffers)(payload->device, payload->commandPool, 1, &frame.commandBuffer);
(*payload->vulkanFunctions.vkDestroyFence)(payload->device, frame.fence, nullptr);
(*payload->vulkanFunctions.vkFreeCommandBuffers)(payload->device, payload->commandPool, 1, &frame.commandBuffer);
}

Memory::Delete(payload);
Expand All @@ -562,5 +579,5 @@ namespace Optick
OPTICK_FAILED("OPTICK_ENABLE_GPU_VULKAN is disabled! Can't initialize GPU Profiler!");
}
}
#endif //OPTICK_ENABLE_GPU_D3D12
#endif //OPTICK_ENABLE_GPU_VULKAN
#endif //USE_OPTICK

0 comments on commit f539821

Please sign in to comment.