Skip to content

Commit

Permalink
Merge pull request #9319 from hrydgard/d3d11-shader-blend
Browse files Browse the repository at this point in the history
D3D11 shader blend and depal
  • Loading branch information
hrydgard authored Feb 17, 2017
2 parents e9d5eb6 + 0616856 commit 3481dae
Show file tree
Hide file tree
Showing 34 changed files with 293 additions and 202 deletions.
3 changes: 1 addition & 2 deletions GPU/Common/DepalettizeShaderCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat, ShaderLang
if (language == HLSL_D3D11) {
WRITE(p, "SamplerState texSamp : register(s0);\n");
WRITE(p, "Texture2D<float4> tex : register(t0);\n");
WRITE(p, "SamplerState palSamp : register(s1);\n");
WRITE(p, "Texture2D<float4> pal : register(t1);\n");
} else if (language == GLSL_VULKAN) {
WRITE(p, "#version 140\n");
Expand Down Expand Up @@ -121,7 +120,7 @@ void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat, ShaderLang
}

if (language == HLSL_D3D11) {
WRITE(p, " return pal.Sample(palSamp, float2((float(index) + 0.5) * (1.0 / %f), 0.0));\n", texturePixels);
WRITE(p, " return pal.Load(int3(index, 0, 0));\n");
} else {
WRITE(p, " fragColor0 = texture(pal, vec2((float(index) + 0.5) * (1.0 / %f), 0.0));\n", texturePixels);
}
Expand Down
34 changes: 33 additions & 1 deletion GPU/Common/DrawEngineCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,11 @@

#define QUAD_INDICES_MAX 65536

DrawEngineCommon::DrawEngineCommon() : dec_(nullptr), decOptions_{} {
DrawEngineCommon::DrawEngineCommon()
: dec_(nullptr),
decOptions_{},
fboTexNeedBind_(false),
fboTexBound_(false) {
quadIndices_ = new u16[6 * QUAD_INDICES_MAX];
decJitCache_ = new VertexDecoderJitCache();
}
Expand Down Expand Up @@ -462,3 +466,31 @@ u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr,
// Okay, there we are! Return the new type (but keep the index bits)
return GE_VTYPE_TC_FLOAT | GE_VTYPE_COL_8888 | GE_VTYPE_NRM_FLOAT | GE_VTYPE_POS_FLOAT | (vertType & (GE_VTYPE_IDX_MASK | GE_VTYPE_THROUGH));
}

bool DrawEngineCommon::ApplyShaderBlending() {
if (gstate_c.featureFlags & GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH) {
return true;
}

static const int MAX_REASONABLE_BLITS_PER_FRAME = 24;

static int lastFrameBlit = -1;
static int blitsThisFrame = 0;
if (lastFrameBlit != gpuStats.numFlips) {
if (blitsThisFrame > MAX_REASONABLE_BLITS_PER_FRAME) {
WARN_LOG_REPORT_ONCE(blendingBlit, G3D, "Lots of blits needed for obscure blending: %d per frame, blend %d/%d/%d", blitsThisFrame, gstate.getBlendFuncA(), gstate.getBlendFuncB(), gstate.getBlendEq());
}
blitsThisFrame = 0;
lastFrameBlit = gpuStats.numFlips;
}
++blitsThisFrame;
if (blitsThisFrame > MAX_REASONABLE_BLITS_PER_FRAME * 2) {
WARN_LOG_ONCE(blendingBlit2, G3D, "Skipping additional blits needed for obscure blending: %d per frame, blend %d/%d/%d", blitsThisFrame, gstate.getBlendFuncA(), gstate.getBlendFuncB(), gstate.getBlendEq());
return false;
}

fboTexNeedBind_ = true;

gstate_c.Dirty(DIRTY_SHADERBLEND);
return true;
}
5 changes: 5 additions & 0 deletions GPU/Common/DrawEngineCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class DrawEngineCommon {
u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType);

void ApplyClearToMemory(int x1, int y1, int x2, int y2, u32 clearColor);
bool ApplyShaderBlending();

VertexDecoder *GetVertexDecoder(u32 vtype);

Expand Down Expand Up @@ -90,6 +91,10 @@ class DrawEngineCommon {
// Fixed index buffer for easy quad generation from spline/bezier
u16 *quadIndices_;

// Shader blending state
bool fboTexNeedBind_;
bool fboTexBound_;

// Hardware tessellation
int numPatches;
class TessellationDataTransfer {
Expand Down
3 changes: 3 additions & 0 deletions GPU/Common/FramebufferCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,9 @@ class FramebufferManagerCommon {
return true;
}

VirtualFramebuffer *GetCurrentRenderVFB() const {
return currentRenderVfb_;
}
// TODO: Break out into some form of FBO manager
VirtualFramebuffer *GetVFBAt(u32 addr);
VirtualFramebuffer *GetDisplayVFB() {
Expand Down
8 changes: 0 additions & 8 deletions GPU/D3D11/DepalettizeShaderD3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,21 +60,13 @@ DepalShaderCacheD3D11::DepalShaderCacheD3D11(ID3D11Device *device, ID3D11DeviceC
std::vector<uint8_t> vsByteCode;
vertexShader_ = CreateVertexShaderD3D11(device, depalVShaderHLSL, strlen(depalVShaderHLSL), &vsByteCode);
device_->CreateInputLayout(g_DepalVertexElements, ARRAY_SIZE(g_DepalVertexElements), vsByteCode.data(), vsByteCode.size(), &inputLayout_);

D3D11_SAMPLER_DESC sampDesc{};
sampDesc.AddressU = D3D11_TEXTURE_ADDRESS_WRAP;
sampDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP;
sampDesc.AddressW = D3D11_TEXTURE_ADDRESS_WRAP;
sampDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT;
device_->CreateSamplerState(&sampDesc, &clutSampler);
}

DepalShaderCacheD3D11::~DepalShaderCacheD3D11() {
Clear();
if (vertexShader_) {
vertexShader_->Release();
}
clutSampler->Release();
}

u32 DepalShaderCacheD3D11::GenerateShaderID(GEPaletteFormat clutFormat, GEBufferFormat pixelFormat) {
Expand Down
2 changes: 0 additions & 2 deletions GPU/D3D11/DepalettizeShaderD3D11.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ class DepalShaderCacheD3D11 {
ID3D11VertexShader *GetDepalettizeVertexShader() { return vertexShader_; }
ID3D11InputLayout *GetInputLayout() { return inputLayout_; }
ID3D11ShaderResourceView *GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut);
ID3D11SamplerState *GetClutSampler() { return clutSampler; }
void Clear();
void Decimate();

Expand All @@ -62,7 +61,6 @@ class DepalShaderCacheD3D11 {
ID3D11DeviceContext *context_;
ID3D11VertexShader *vertexShader_ = nullptr;
ID3D11InputLayout *inputLayout_ = nullptr;
ID3D11SamplerState *clutSampler = nullptr;

std::map<u32, DepalShaderD3D11 *> cache_;
std::map<u32, DepalTextureD3D11 *> texCache_;
Expand Down
4 changes: 1 addition & 3 deletions GPU/D3D11/DrawEngineD3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,7 @@ DrawEngineD3D11::DrawEngineD3D11(Draw::DrawContext *draw, ID3D11Device *device,
numDrawCalls(0),
vertexCountInDrawCalls(0),
decodeCounter_(0),
dcid_(0),
fboTexNeedBind_(false),
fboTexBound_(false) {
dcid_(0) {
decOptions_.expandAllWeightsToFloat = true;
decOptions_.expand8BitNormalsToFloat = true;

Expand Down
7 changes: 3 additions & 4 deletions GPU/D3D11/DrawEngineD3D11.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ class DrawEngineD3D11 : public DrawEngineCommon {

void ApplyDrawState(int prim);
void ApplyDrawStateLate(bool applyStencilRef, uint8_t stencilRef);
void ResetShaderBlending();

ID3D11InputLayout *SetupDecFmtForDraw(D3D11VertexShader *vshader, const DecVtxFormat &decFmt, u32 pspFmt);

Expand Down Expand Up @@ -239,16 +240,14 @@ class DrawEngineD3D11 : public DrawEngineCommon {
std::map<uint32_t, ID3D11RasterizerState *> rasterCache_;

// Keep the depth state between ApplyDrawState and ApplyDrawStateLate
ID3D11RasterizerState *rasterState_;
ID3D11BlendState *blendState_;
ID3D11DepthStencilState *depthStencilState_;

// State keys
D3D11StateKeys keys_{};
D3D11DynamicState dynState_{};

// Initial work on shader blending
bool fboTexNeedBind_;
bool fboTexBound_;

// Hardware tessellation
class TessellationDataTransferD3D11 : public TessellationDataTransfer {
private:
Expand Down
88 changes: 64 additions & 24 deletions GPU/D3D11/FramebufferManagerD3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ void FramebufferManagerD3D11::ReformatFramebufferFrom(VirtualFramebuffer *vfb, G
D3D11_VIEWPORT vp{ 0.0f, 0.0f, (float)vfb->renderWidth, (float)vfb->renderHeight, 0.0f, 1.0f };
context_->RSSetViewports(1, &vp);
context_->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
context_->Draw(2, 0);
context_->Draw(4, 0);
}

RebindFramebuffer();
Expand Down Expand Up @@ -398,19 +398,14 @@ void FramebufferManagerD3D11::BlitFramebufferDepth(VirtualFramebuffer *src, Virt
}
bool matchingDepthBuffer = src->z_address == dst->z_address && src->z_stride != 0 && dst->z_stride != 0;
bool matchingSize = src->width == dst->width && src->height == dst->height;
if (matchingDepthBuffer && matchingSize) {
// Doesn't work. Use a shader maybe?
draw_->BindBackbufferAsRenderTarget();
bool matchingRenderSize = src->renderWidth == dst->renderWidth && src->renderHeight == dst->renderHeight;
if (matchingDepthBuffer && matchingSize && matchingRenderSize) {
draw_->CopyFramebufferImage(src->fbo, 0, 0, 0, 0, dst->fbo, 0, 0, 0, 0, src->renderWidth, src->renderHeight, 1, Draw::FB_DEPTH_BIT);
RebindFramebuffer();
}
}

void FramebufferManagerD3D11::BindFramebufferColor(int stage, VirtualFramebuffer *framebuffer, int flags) {
if (framebuffer == NULL) {
framebuffer = currentRenderVfb_;
}

void FramebufferManagerD3D11::BindFramebufferAsColorTexture(int stage, VirtualFramebuffer *framebuffer, int flags) {
if (!framebuffer->fbo || !useBufferedRendering_) {
ID3D11ShaderResourceView *view = nullptr;
context_->PSSetShaderResources(stage, 1, &view);
Expand All @@ -425,7 +420,7 @@ void FramebufferManagerD3D11::BindFramebufferColor(int stage, VirtualFramebuffer
skipCopy = true;
}
// Currently rendering to this framebuffer. Need to make a copy.
if (!skipCopy && currentRenderVfb_ && framebuffer->fb_address == gstate.getFrameBufRawAddress()) {
if (!skipCopy && framebuffer == currentRenderVfb_) {
// TODO: Maybe merge with bvfbs_? Not sure if those could be packing, and they're created at a different size.
Draw::Framebuffer *renderCopy = GetTempFBO(framebuffer->renderWidth, framebuffer->renderHeight, (Draw::FBColorDepth)framebuffer->colorDepth);
if (renderCopy) {
Expand Down Expand Up @@ -459,8 +454,14 @@ void FramebufferManagerD3D11::BindFramebufferColor(int stage, VirtualFramebuffer
} else {
draw_->BindFramebufferAsTexture(framebuffer->fbo, stage, Draw::FB_COLOR_BIT, 0);
}
} else {
} else if (framebuffer != currentRenderVfb_) {
draw_->BindFramebufferAsTexture(framebuffer->fbo, stage, Draw::FB_COLOR_BIT, 0);
} else {
// Badness on D3D11 to bind the currently rendered-to framebuffer as a texture.
ID3D11ShaderResourceView *view = nullptr;
context_->PSSetShaderResources(stage, 1, &view);
gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE;
return;
}
}

Expand Down Expand Up @@ -531,6 +532,53 @@ void FramebufferManagerD3D11::UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb)
// Nothing to do here.
}

void FramebufferManagerD3D11::SimpleBlit(
Draw::Framebuffer *dest, float destX1, float destY1, float destX2, float destY2,
Draw::Framebuffer *src, float srcX1, float srcY1, float srcX2, float srcY2, bool linearFilter) {

int destW, destH, srcW, srcH;
draw_->GetFramebufferDimensions(src, &srcW, &srcH);
draw_->GetFramebufferDimensions(dest, &destW, &destH);

if (srcW == destW && srcH == destH && destX2 - destX1 == srcX2 - srcX1 && destY2 - destY1 == srcY2 - srcY1) {
// Optimize to a copy
draw_->CopyFramebufferImage(src, 0, (int)srcX1, (int)srcY1, 0, dest, 0, (int)destX1, (int)destY1, 0, (int)(srcX2 - srcX1), (int)(srcY2 - srcY1), 1, Draw::FB_COLOR_BIT);
return;
}

float dX = 1.0f / (float)destW;
float dY = 1.0f / (float)destH;
float sX = 1.0f / (float)srcW;
float sY = 1.0f / (float)srcH;
struct Vtx {
float x, y, z, u, v;
};
Vtx vtx[4] = {
{ dX * destX1, dY * destY1, 0.0f, sX * srcX1, sY * srcY1 },
{ dX * destX2, dY * destY1, 0.0f, sX * srcX2, sY * srcY1 },
{ dX * destX1, dY * destY2, 0.0f, sX * srcX1, sY * srcY2 },
{ dX * destX2, dY * destY2, 0.0f, sX * srcX2, sY * srcY2 },
};

D3D11_MAPPED_SUBRESOURCE map;
context_->Map(quadBuffer_, 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
memcpy(map.pData, vtx, 4 * sizeof(Vtx));
context_->Unmap(quadBuffer_, 0);

draw_->BindFramebufferAsTexture(src, 0, Draw::FB_COLOR_BIT, 0);
draw_->BindFramebufferAsRenderTarget(dest);
Bind2DShader();
context_->RSSetState(stockD3D11.rasterStateNoCull);
context_->OMSetBlendState(stockD3D11.blendStateDisabledWithColorMask[0xF], nullptr, 0xFFFFFFFF);
context_->OMSetDepthStencilState(stockD3D11.depthStencilDisabled, 0);
context_->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
context_->PSSetSamplers(0, 1, linearFilter ? &stockD3D11.samplerLinear2DClamp : &stockD3D11.samplerPoint2DClamp);
UINT stride = sizeof(Vtx);
UINT offset = 0;
context_->IASetVertexBuffers(0, 1, &quadBuffer_, &stride, &offset);
context_->Draw(4, 0);
}

void FramebufferManagerD3D11::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp) {
if (!dst->fbo || !src->fbo || !useBufferedRendering_) {
// This can happen if they recently switched from non-buffered.
Expand Down Expand Up @@ -564,21 +612,13 @@ void FramebufferManagerD3D11::BlitFramebuffer(VirtualFramebuffer *dst, int dstX,
Draw::Framebuffer *srcFBO = src->fbo;
if (src == dst) {
Draw::Framebuffer *tempFBO = GetTempFBO(src->renderWidth, src->renderHeight, (Draw::FBColorDepth)src->colorDepth);
bool result = draw_->BlitFramebuffer(
src->fbo, srcX1, srcY1, srcX2, srcY2,
tempFBO, dstX1, dstY1, dstX2, dstY2,
Draw::FB_COLOR_BIT, Draw::FB_BLIT_NEAREST);
if (result) {
srcFBO = tempFBO;
}
SimpleBlit(tempFBO, dstX1, dstY1, dstX2, dstY2, src->fbo, srcX1, srcY1, srcX2, srcY2, false);
srcFBO = tempFBO;
}
bool result = draw_->BlitFramebuffer(
srcFBO, srcX1, srcY1, srcX2, srcY2,
SimpleBlit(
dst->fbo, dstX1, dstY1, dstX2, dstY2,
Draw::FB_COLOR_BIT, Draw::FB_BLIT_NEAREST);
if (!result) {
ERROR_LOG_REPORT(G3D, "fbo_blit_color failed in blit: %08x (%08x -> %08x)", src->fb_address, dst->fb_address);
}
srcFBO, srcX1, srcY1, srcX2, srcY2,
false);
}

// TODO: SSE/NEON
Expand Down
6 changes: 5 additions & 1 deletion GPU/D3D11/FramebufferManagerD3D11.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class FramebufferManagerD3D11 : public FramebufferManagerCommon {

void BlitFramebufferDepth(VirtualFramebuffer *src, VirtualFramebuffer *dst) override;

void BindFramebufferColor(int stage, VirtualFramebuffer *framebuffer, int flags);
void BindFramebufferAsColorTexture(int stage, VirtualFramebuffer *framebuffer, int flags);

void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override;
void DownloadFramebufferForClut(u32 fb_address, u32 loadBytes) override;
Expand Down Expand Up @@ -99,6 +99,10 @@ class FramebufferManagerD3D11 : public FramebufferManagerCommon {
void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) override;
void PackFramebufferD3D11_(VirtualFramebuffer *vfb, int x, int y, int w, int h);
void PackDepthbuffer(VirtualFramebuffer *vfb, int x, int y, int w, int h);
void SimpleBlit(
Draw::Framebuffer *dest, float destX1, float destY1, float destX2, float destY2,
Draw::Framebuffer *src, float srcX1, float srcY1, float srcX2, float srcY2,
bool linearFilter);

ID3D11Device *device_;
ID3D11DeviceContext *context_;
Expand Down
Loading

0 comments on commit 3481dae

Please sign in to comment.