Skip to content

Commit

Permalink
Decode vertex data directly into the vertex pushbuffer, saving a memcpy.
Browse files Browse the repository at this point in the history
  • Loading branch information
hrydgard committed Mar 20, 2016
1 parent 7d89c26 commit e76f369
Show file tree
Hide file tree
Showing 8 changed files with 87 additions and 91 deletions.
2 changes: 1 addition & 1 deletion Common/Vulkan/VulkanContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ using namespace std;

static const char *validationLayers[] = {
"VK_LAYER_GOOGLE_unique_objects",
"VK_LAYER_LUNARG_standard_validation",
//"VK_LAYER_LUNARG_standard_validation",
/*
"VK_LAYER_GOOGLE_threading",
"VK_LAYER_LUNARG_draw_state",
Expand Down
2 changes: 1 addition & 1 deletion Common/Vulkan/VulkanImage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ void VulkanTexture::CreateDirect(int w, int h, int numMips, VkFormat format, VkI
assert(res == VK_SUCCESS);
}

void VulkanTexture::UploadMip(int mip, int mipWidth, int mipHeight, VkBuffer buffer, size_t offset, size_t rowLength) {
void VulkanTexture::UploadMip(int mip, int mipWidth, int mipHeight, VkBuffer buffer, uint32_t offset, size_t rowLength) {
VkBufferImageCopy copy_region = {};
copy_region.bufferOffset = offset;
copy_region.bufferRowLength = (uint32_t)rowLength;
Expand Down
2 changes: 1 addition & 1 deletion Common/Vulkan/VulkanImage.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class VulkanTexture {

// Fast uploads from buffer. Mipmaps supported. Usage must at least include VK_IMAGE_USAGE_TRANSFER_DST_BIT in order to use UploadMip.
void CreateDirect(int w, int h, int numMips, VkFormat format, VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT);
void UploadMip(int mip, int mipWidth, int mipHeight, VkBuffer buffer, size_t offset, size_t rowLength); // rowLength is in pixels
void UploadMip(int mip, int mipWidth, int mipHeight, VkBuffer buffer, uint32_t offset, size_t rowLength); // rowLength is in pixels
void EndCreate();
int GetNumMips() const { return numMips_; }
void Destroy();
Expand Down
4 changes: 2 additions & 2 deletions Common/Vulkan/VulkanMemory.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,9 @@ class VulkanPushBuffer {
}

// "Zero-copy" variant - you can write the data directly as you compute it.
void *Push(size_t size, size_t *bindOffset, VkBuffer *vkbuf) {
void *Push(size_t size, uint32_t *bindOffset, VkBuffer *vkbuf) {
size_t off = Allocate(size, vkbuf);
*bindOffset = off;
*bindOffset = (uint32_t)off;
return writePtr_ + off;
}

Expand Down
152 changes: 80 additions & 72 deletions GPU/Vulkan/DrawEngineVulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,13 @@ enum {
DrawEngineVulkan::DrawEngineVulkan(VulkanContext *vulkan)
:
vulkan_(vulkan),
decodedVerts_(0),
prevPrim_(GE_PRIM_INVALID),
lastVTypeID_(-1),
pipelineManager_(nullptr),
textureCache_(nullptr),
framebufferManager_(nullptr),
numDrawCalls(0),
vertexCountInDrawCalls(0),
decodeCounter_(0),
fboTexNeedBind_(false),
fboTexBound_(false),
curFrame_(0) {
Expand Down Expand Up @@ -298,82 +296,93 @@ void DrawEngineVulkan::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim,
}
}

void DrawEngineVulkan::DecodeVerts() {
for (; decodeCounter_ < numDrawCalls; decodeCounter_++) {
DecodeVertsStep(decoded);
}
// Sanity check
if (indexGen.Prim() < 0) {
ERROR_LOG_REPORT(G3D, "DecodeVerts: Failed to deduce prim: %i", indexGen.Prim());
// Force to points (0)
indexGen.AddPrim(GE_PRIM_POINTS, 0);
void DrawEngineVulkan::DecodeVerts(VulkanPushBuffer *push, uint32_t *bindOffset, VkBuffer *vkbuf) {
int decodedVerts = 0;

u8 *dest = decoded;

// Figure out how much pushbuffer space we need to allocate.
if (push) {
int vertsToDecode = 0;
for (int i = 0; i < numDrawCalls; i++) {
const DeferredDrawCall &dc = drawCalls[i];
if (dc.indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) {
vertsToDecode += dc.indexUpperBound - dc.indexLowerBound + 1;
} else {
vertsToDecode += dc.vertexCount;
}
}
dest = (u8 *)push->Push(vertsToDecode * dec_->GetDecVtxFmt().stride, bindOffset, vkbuf);
}
}

void DrawEngineVulkan::DecodeVertsStep(u8 *decoded) {
const int i = decodeCounter_;
for (int i = 0; i < numDrawCalls; i++) {
const DeferredDrawCall &dc = drawCalls[i];

const DeferredDrawCall &dc = drawCalls[i];
indexGen.SetIndex(decodedVerts);
int indexLowerBound = dc.indexLowerBound, indexUpperBound = dc.indexUpperBound;

indexGen.SetIndex(decodedVerts_);
int indexLowerBound = dc.indexLowerBound, indexUpperBound = dc.indexUpperBound;
void *inds = dc.inds;
if (dc.indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) {
// Decode the verts and apply morphing. Simple.
dec_->DecodeVerts(dest + decodedVerts * (int)dec_->GetDecVtxFmt().stride,
dc.verts, indexLowerBound, indexUpperBound);
decodedVerts += indexUpperBound - indexLowerBound + 1;
indexGen.AddPrim(dc.prim, dc.vertexCount);
} else {
// It's fairly common that games issue long sequences of PRIM calls, with differing
// inds pointer but the same base vertex pointer. We'd like to reuse vertices between
// these as much as possible, so we make sure here to combine as many as possible
// into one nice big drawcall, sharing data.

// 1. Look ahead to find the max index, only looking as "matching" drawcalls.
// Expand the lower and upper bounds as we go.
int lastMatch = i;
const int total = numDrawCalls;
for (int j = i + 1; j < total; ++j) {
if (drawCalls[j].verts != dc.verts)
break;

indexLowerBound = std::min(indexLowerBound, (int)drawCalls[j].indexLowerBound);
indexUpperBound = std::max(indexUpperBound, (int)drawCalls[j].indexUpperBound);
lastMatch = j;
}

u32 indexType = dc.indexType;
void *inds = dc.inds;
if (indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) {
// Decode the verts and apply morphing. Simple.
dec_->DecodeVerts(decoded + decodedVerts_ * (int)dec_->GetDecVtxFmt().stride,
dc.verts, indexLowerBound, indexUpperBound);
decodedVerts_ += indexUpperBound - indexLowerBound + 1;
indexGen.AddPrim(dc.prim, dc.vertexCount);
} else {
// It's fairly common that games issue long sequences of PRIM calls, with differing
// inds pointer but the same base vertex pointer. We'd like to reuse vertices between
// these as much as possible, so we make sure here to combine as many as possible
// into one nice big drawcall, sharing data.

// 1. Look ahead to find the max index, only looking as "matching" drawcalls.
// Expand the lower and upper bounds as we go.
int lastMatch = i;
const int total = numDrawCalls;
for (int j = i + 1; j < total; ++j) {
if (drawCalls[j].verts != dc.verts)
// 2. Loop through the drawcalls, translating indices as we go.
switch (dc.indexType) {
case GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) {
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u8 *)drawCalls[j].inds, indexLowerBound);
}
break;
case GE_VTYPE_IDX_16BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) {
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u16 *)drawCalls[j].inds, indexLowerBound);
}
break;
}

indexLowerBound = std::min(indexLowerBound, (int)drawCalls[j].indexLowerBound);
indexUpperBound = std::max(indexUpperBound, (int)drawCalls[j].indexUpperBound);
lastMatch = j;
}
const int vertexCount = indexUpperBound - indexLowerBound + 1;

// 2. Loop through the drawcalls, translating indices as we go.
switch (indexType) {
case GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) {
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u8 *)drawCalls[j].inds, indexLowerBound);
}
break;
case GE_VTYPE_IDX_16BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) {
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u16 *)drawCalls[j].inds, indexLowerBound);
// This check is a workaround for Pangya Fantasy Golf, which sends bogus index data when switching items in "My Room" sometimes.
if (decodedVerts + vertexCount > VERTEX_BUFFER_MAX) {
return;
}
break;
}

const int vertexCount = indexUpperBound - indexLowerBound + 1;
// 3. Decode that range of vertex data.
dec_->DecodeVerts(dest + decodedVerts * (int)dec_->GetDecVtxFmt().stride,
dc.verts, indexLowerBound, indexUpperBound);
decodedVerts += vertexCount;

// This check is a workaround for Pangya Fantasy Golf, which sends bogus index data when switching items in "My Room" sometimes.
if (decodedVerts_ + vertexCount > VERTEX_BUFFER_MAX) {
return;
// 4. Advance indexgen vertex counter.
indexGen.Advance(vertexCount);
i = lastMatch;
}

// 3. Decode that range of vertex data.
dec_->DecodeVerts(decoded + decodedVerts_ * (int)dec_->GetDecVtxFmt().stride,
dc.verts, indexLowerBound, indexUpperBound);
decodedVerts_ += vertexCount;

// 4. Advance indexgen vertex counter.
indexGen.Advance(vertexCount);
decodeCounter_ = lastMatch;
}
// Sanity check
if (indexGen.Prim() < 0) {
ERROR_LOG_REPORT(G3D, "DecodeVerts: Failed to deduce prim: %i", indexGen.Prim());
// Force to points (0)
indexGen.AddPrim(GE_PRIM_POINTS, 0);
}
}

Expand Down Expand Up @@ -520,7 +529,9 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) {
int maxIndex = 0;
bool useElements = true;

DecodeVerts();
// Decode directly into the pushbuffer
VkBuffer vbuf;
DecodeVerts(frame->pushVertex, &vbOffset, &vbuf);
gpuStats.numUncachedVertsDrawn += indexGen.VertexCount();
useElements = !indexGen.SeenOnlyPurePrims();
vertexCount = indexGen.VertexCount();
Expand Down Expand Up @@ -578,8 +589,6 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) {
vkCmdBindDescriptorSets(cmd_, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout_, 0, 1, &ds, 3, dynamicUBOOffsets);

int stride = dec_->GetDecVtxFmt().stride;
VkBuffer vbuf;
vbOffset = (uint32_t)frame->pushVertex->Push(decoded, indexGen.MaxIndex() * stride, &vbuf);

VkDeviceSize offsets[1] = { vbOffset };
if (useElements) {
Expand All @@ -594,7 +603,8 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) {
vkCmdDraw(cmd_, vertexCount, 1, 0, 0);
}
} else {
DecodeVerts();
// Decode to "decoded"
DecodeVerts(nullptr, nullptr, nullptr);
bool hasColor = (lastVTypeID_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
if (gstate.isModeThrough()) {
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255);
Expand Down Expand Up @@ -746,10 +756,8 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) {
gpuStats.numVertsSubmitted += vertexCountInDrawCalls;

indexGen.Reset();
decodedVerts_ = 0;
numDrawCalls = 0;
vertexCountInDrawCalls = 0;
decodeCounter_ = 0;
prevPrim_ = GE_PRIM_INVALID;
gstate_c.vertexFullAlpha = true;
framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);
Expand Down
12 changes: 1 addition & 11 deletions GPU/Vulkan/DrawEngineVulkan.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,6 @@ class DrawEngineVulkan : public DrawEngineCommon {
DoFlush(cmd);
}

void FinishDeferred() {
if (!numDrawCalls)
return;
DecodeVerts();
}

bool IsCodePtrVertexDecoder(const u8 *ptr) const;

void DispatchFlush() override { Flush(cmd_); }
Expand All @@ -148,8 +142,7 @@ class DrawEngineVulkan : public DrawEngineCommon {
void EndFrame();

private:
void DecodeVerts();
void DecodeVertsStep(u8 *decoded);
void DecodeVerts(VulkanPushBuffer *push, uint32_t *bindOffset, VkBuffer *vkbuf);
void DoFlush(VkCommandBuffer cmd);

VkDescriptorSet GetDescriptorSet(VkImageView imageView, VkSampler sampler, VkBuffer base, VkBuffer light, VkBuffer bone);
Expand Down Expand Up @@ -213,7 +206,6 @@ class DrawEngineVulkan : public DrawEngineCommon {

// Vertex collector state
IndexGenerator indexGen;
int decodedVerts_;
GEPrimitiveType prevPrim_;

u32 lastVTypeID_;
Expand All @@ -235,8 +227,6 @@ class DrawEngineVulkan : public DrawEngineCommon {
int numDrawCalls;
int vertexCountInDrawCalls;

int decodeCounter_;

bool fboTexNeedBind_;
bool fboTexBound_;
};
2 changes: 0 additions & 2 deletions GPU/Vulkan/GPU_Vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -672,8 +672,6 @@ void GPU_Vulkan::FastRunLoop(DisplayList &list) {
}

void GPU_Vulkan::FinishDeferred() {
// This finishes reading any vertex data that is pending.
drawEngine_.FinishDeferred();
}

void GPU_Vulkan::ProcessEvent(GPUEvent ev) {
Expand Down
2 changes: 1 addition & 1 deletion GPU/Vulkan/TextureCacheVulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1481,7 +1481,7 @@ void TextureCacheVulkan::SetTexture(VulkanPushBuffer *uploadBuffer) {
int bpp = dstFmt == VULKAN_8888_FORMAT ? 4 : 2;
int stride = (mipWidth * bpp + 15) & ~15;
int size = stride * mipHeight;
size_t bufferOffset;
uint32_t bufferOffset;
VkBuffer texBuf;
void *data = uploadBuffer->Push(size, &bufferOffset, &texBuf);
LoadTextureLevel(*entry, (uint8_t *)data, stride, i, replaceImages, scaleFactor, dstFmt);
Expand Down

0 comments on commit e76f369

Please sign in to comment.