Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vulkan: Parallel pipeline creation #16802

Merged
merged 5 commits into from
Feb 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Common/GPU/Vulkan/VulkanQueueRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1336,7 +1336,7 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c
// Maybe a middle pass. But let's try to just block and compile here for now, this doesn't
// happen all that much.
graphicsPipeline->pipeline[(size_t)rpType] = Promise<VkPipeline>::CreateEmpty();
graphicsPipeline->Create(vulkan_, renderPass->Get(vulkan_, rpType, fbSampleCount), rpType, fbSampleCount);
graphicsPipeline->Create(vulkan_, renderPass->Get(vulkan_, rpType, fbSampleCount), rpType, fbSampleCount, time_now_d(), -1);
}

VkPipeline pipeline = graphicsPipeline->pipeline[(size_t)rpType]->BlockUntilReady();
Expand Down
115 changes: 87 additions & 28 deletions Common/GPU/Vulkan/VulkanRenderManager.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <algorithm>
#include <cstdint>

#include <map>
#include <sstream>

#include "Common/Log.h"
Expand All @@ -27,7 +28,7 @@
using namespace PPSSPP_VK;

// renderPass is an example of the "compatibility class" or RenderPassType type.
bool VKRGraphicsPipeline::Create(VulkanContext *vulkan, VkRenderPass compatibleRenderPass, RenderPassType rpType, VkSampleCountFlagBits sampleCount) {
bool VKRGraphicsPipeline::Create(VulkanContext *vulkan, VkRenderPass compatibleRenderPass, RenderPassType rpType, VkSampleCountFlagBits sampleCount, double scheduleTime, int countToCompile) {
bool multisample = RenderPassTypeHasMultisample(rpType);
if (multisample) {
if (sampleCount_ != VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM) {
Expand Down Expand Up @@ -118,12 +119,17 @@ bool VKRGraphicsPipeline::Create(VulkanContext *vulkan, VkRenderPass compatibleR
double start = time_now_d();
VkPipeline vkpipeline;
VkResult result = vkCreateGraphicsPipelines(vulkan->GetDevice(), desc->pipelineCache, 1, &pipe, nullptr, &vkpipeline);
double taken_ms = (time_now_d() - start) * 1000.0;

double now = time_now_d();
double taken_ms_since_scheduling = (now - scheduleTime) * 1000.0;
double taken_ms = (now - start) * 1000.0;

if (taken_ms < 0.1) {
DEBUG_LOG(G3D, "Pipeline creation time: %0.2f ms (fast) rpType: %08x sampleBits: %d (%s)", taken_ms, (u32)rpType, (u32)sampleCount, tag_.c_str());
DEBUG_LOG(G3D, "Pipeline (x/%d) time on %s: %0.2f ms, %0.2f ms since scheduling (fast) rpType: %04x sampleBits: %d (%s)",
countToCompile, GetCurrentThreadName(), taken_ms, taken_ms_since_scheduling, (u32)rpType, (u32)sampleCount, tag_.c_str());
} else {
INFO_LOG(G3D, "Pipeline creation time: %0.2f ms rpType: %08x sampleBits: %d (%s)", taken_ms, (u32)rpType, (u32)sampleCount, tag_.c_str());
INFO_LOG(G3D, "Pipeline (x/%d) time on %s: %0.2f ms, %0.2f ms since scheduling rpType: %04x sampleBits: %d (%s)",
countToCompile, GetCurrentThreadName(), taken_ms, taken_ms_since_scheduling, (u32)rpType, (u32)sampleCount, tag_.c_str());
}

bool success = true;
Expand Down Expand Up @@ -218,26 +224,27 @@ void VKRGraphicsPipeline::LogCreationFailure() const {
ERROR_LOG(G3D, "======== END OF PIPELINE ==========");
}

bool VKRComputePipeline::Create(VulkanContext *vulkan) {
bool VKRComputePipeline::CreateAsync(VulkanContext *vulkan) {
if (!desc) {
// Already failed to create this one.
return false;
}
VkPipeline vkpipeline;
VkResult result = vkCreateComputePipelines(vulkan->GetDevice(), desc->pipelineCache, 1, &desc->pipe, nullptr, &vkpipeline);

bool success = true;
if (result != VK_SUCCESS) {
pipeline->Post(VK_NULL_HANDLE);
ERROR_LOG(G3D, "Failed creating compute pipeline! result='%s'", VulkanResultToString(result));
success = false;
} else {
pipeline->Post(vkpipeline);
}
pipeline->SpawnEmpty(&g_threadManager, [=] {
VkPipeline vkpipeline;
VkResult result = vkCreateComputePipelines(vulkan->GetDevice(), desc->pipelineCache, 1, &desc->pipe, nullptr, &vkpipeline);

delete desc;
bool success = true;
if (result == VK_SUCCESS) {
return vkpipeline;
} else {
ERROR_LOG(G3D, "Failed creating compute pipeline! result='%s'", VulkanResultToString(result));
success = false;
return (VkPipeline)VK_NULL_HANDLE;
}
delete desc;
}, TaskType::CPU_COMPUTE);
desc = nullptr;
return success;
return true;
}

VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan)
Expand Down Expand Up @@ -370,7 +377,6 @@ VulkanRenderManager::~VulkanRenderManager() {

vulkan_->WaitUntilQueueIdle();

DrainCompileQueue();
VkDevice device = vulkan_->GetDevice();
frameDataShared_.Destroy(vulkan_);
for (int i = 0; i < inflightFramesAtStart_; i++) {
Expand All @@ -379,12 +385,43 @@ VulkanRenderManager::~VulkanRenderManager() {
queueRunner_.DestroyDeviceObjects();
}

struct SinglePipelineTask {
VKRGraphicsPipeline *pipeline;
VkRenderPass compatibleRenderPass;
RenderPassType rpType;
VkSampleCountFlagBits sampleCount;
double scheduleTime;
int countToCompile;
};

class CreateMultiPipelinesTask : public Task {
public:
CreateMultiPipelinesTask(VulkanContext *vulkan, std::vector<SinglePipelineTask> tasks) : vulkan_(vulkan), tasks_(tasks) {}
~CreateMultiPipelinesTask() {}

TaskType Type() const override {
return TaskType::CPU_COMPUTE;
}

void Run() override {
for (auto &task : tasks_) {
task.pipeline->Create(vulkan_, task.compatibleRenderPass, task.rpType, task.sampleCount, task.scheduleTime, task.countToCompile);
}
}

VulkanContext *vulkan_;
std::vector<SinglePipelineTask> tasks_;
};

void VulkanRenderManager::CompileThreadFunc() {
SetCurrentThreadName("ShaderCompile");
while (true) {
std::vector<CompileQueueEntry> toCompile;
{
std::unique_lock<std::mutex> lock(compileMutex_);
// TODO: Should this be while?
// It may be beneficial also to unlock and wait a little bit to see if we get some more shaders
// so we can do a better job of thread-sorting them.
if (compileQueue_.empty() && run_) {
compileCond_.wait(lock);
}
Expand All @@ -395,24 +432,46 @@ void VulkanRenderManager::CompileThreadFunc() {
break;
}

double time = time_now_d();
// TODO: Here we can sort the pending pipelines by vertex and fragment shaders,
// and split up further.
// Those with the same pairs of shaders should be on the same thread.
int countToCompile = (int)toCompile.size();

// Here we sort the pending pipelines by vertex and fragment shaders,
std::map<std::pair<Promise<VkShaderModule> *, Promise<VkShaderModule> *>, std::vector<SinglePipelineTask>> map;

double scheduleTime = time_now_d();

// Here we sort pending graphics pipelines by vertex and fragment shaders, and split up further.
// Those with the same pairs of shaders should be on the same thread, at least on NVIDIA.
// I don't think PowerVR cares though, it doesn't seem to reuse information between the compiles,
// so we might want a different splitting algorithm there.
for (auto &entry : toCompile) {
switch (entry.type) {
case CompileQueueEntry::Type::GRAPHICS:
entry.graphics->Create(vulkan_, entry.compatibleRenderPass, entry.renderPassType, entry.sampleCount);
map[std::pair< Promise<VkShaderModule> *, Promise<VkShaderModule> *>(entry.graphics->desc->vertexShader, entry.graphics->desc->fragmentShader)].push_back(
SinglePipelineTask{
entry.graphics,
entry.compatibleRenderPass,
entry.renderPassType,
entry.sampleCount,
scheduleTime, // these two are for logging purposes.
countToCompile,
}
);
break;
case CompileQueueEntry::Type::COMPUTE:
entry.compute->Create(vulkan_);
// Queue up pending compute pipelines on separate tasks.
entry.compute->CreateAsync(vulkan_);
break;
}
}

double delta = time_now_d() - time;
if (delta > 0.005f) {
INFO_LOG(G3D, "CompileThreadFunc: Creating %d pipelines took %0.3f ms", (int)toCompile.size(), delta * 1000.0f);
for (auto iter : map) {
auto &shaders = iter.first;
auto &entries = iter.second;

// NOTICE_LOG(G3D, "For this shader pair, we have %d pipelines to create", (int)entries.size());

Task *task = new CreateMultiPipelinesTask(vulkan_, entries);
g_threadManager.EnqueueTask(task);
}

queueRunner_.NotifyCompileDone();
Expand Down
5 changes: 3 additions & 2 deletions Common/GPU/Vulkan/VulkanRenderManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ struct VKRGraphicsPipeline {
VKRGraphicsPipeline(PipelineFlags flags, const char *tag) : flags_(flags), tag_(tag) {}
~VKRGraphicsPipeline();

bool Create(VulkanContext *vulkan, VkRenderPass compatibleRenderPass, RenderPassType rpType, VkSampleCountFlagBits sampleCount);
bool Create(VulkanContext *vulkan, VkRenderPass compatibleRenderPass, RenderPassType rpType, VkSampleCountFlagBits sampleCount, double scheduleTime, int countToCompile);

void DestroyVariants(VulkanContext *vulkan, bool msaaOnly);

Expand All @@ -137,6 +137,7 @@ struct VKRGraphicsPipeline {
VkSampleCountFlagBits SampleCount() const { return sampleCount_; }

const char *Tag() const { return tag_.c_str(); }

private:
void DestroyVariantsInstant(VkDevice device);

Expand All @@ -153,7 +154,7 @@ struct VKRComputePipeline {
VKRComputePipelineDesc *desc = nullptr;
Promise<VkPipeline> *pipeline = nullptr;

bool Create(VulkanContext *vulkan);
bool CreateAsync(VulkanContext *vulkan);
bool Pending() const {
return pipeline == VK_NULL_HANDLE && desc != nullptr;
}
Expand Down
2 changes: 1 addition & 1 deletion GPU/Vulkan/ShaderManagerVulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ static Promise<VkShaderModule> *CompileShaderModuleAsync(VulkanContext *vulkan,
if (singleThreaded) {
return Promise<VkShaderModule>::AlreadyDone(compile());
} else {
return Promise<VkShaderModule>::Spawn(&g_threadManager, compile, TaskType::CPU_COMPUTE);
return Promise<VkShaderModule>::Spawn(&g_threadManager, compile, TaskType::DEDICATED_THREAD);
}
}

Expand Down