Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

D3D11 shader blend and depal #9319

Merged
merged 10 commits into from
Feb 17, 2017
34 changes: 33 additions & 1 deletion GPU/Common/DrawEngineCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,11 @@

#define QUAD_INDICES_MAX 65536

DrawEngineCommon::DrawEngineCommon() : dec_(nullptr), decOptions_{} {
DrawEngineCommon::DrawEngineCommon()
: dec_(nullptr),
decOptions_{},
fboTexNeedBind_(false),
fboTexBound_(false) {
quadIndices_ = new u16[6 * QUAD_INDICES_MAX];
decJitCache_ = new VertexDecoderJitCache();
}
Expand Down Expand Up @@ -462,3 +466,31 @@ u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr,
// Okay, there we are! Return the new type (but keep the index bits)
return GE_VTYPE_TC_FLOAT | GE_VTYPE_COL_8888 | GE_VTYPE_NRM_FLOAT | GE_VTYPE_POS_FLOAT | (vertType & (GE_VTYPE_IDX_MASK | GE_VTYPE_THROUGH));
}

bool DrawEngineCommon::ApplyShaderBlending() {
if (gstate_c.featureFlags & GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH) {
return true;
}

static const int MAX_REASONABLE_BLITS_PER_FRAME = 24;

static int lastFrameBlit = -1;
static int blitsThisFrame = 0;
if (lastFrameBlit != gpuStats.numFlips) {
if (blitsThisFrame > MAX_REASONABLE_BLITS_PER_FRAME) {
WARN_LOG_REPORT_ONCE(blendingBlit, G3D, "Lots of blits needed for obscure blending: %d per frame, blend %d/%d/%d", blitsThisFrame, gstate.getBlendFuncA(), gstate.getBlendFuncB(), gstate.getBlendEq());
}
blitsThisFrame = 0;
lastFrameBlit = gpuStats.numFlips;
}
++blitsThisFrame;
if (blitsThisFrame > MAX_REASONABLE_BLITS_PER_FRAME * 2) {
WARN_LOG_ONCE(blendingBlit2, G3D, "Skipping additional blits needed for obscure blending: %d per frame, blend %d/%d/%d", blitsThisFrame, gstate.getBlendFuncA(), gstate.getBlendFuncB(), gstate.getBlendEq());
return false;
}

fboTexNeedBind_ = true;

gstate_c.Dirty(DIRTY_SHADERBLEND);
return true;
}
5 changes: 5 additions & 0 deletions GPU/Common/DrawEngineCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class DrawEngineCommon {
u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType);

void ApplyClearToMemory(int x1, int y1, int x2, int y2, u32 clearColor);
bool ApplyShaderBlending();

VertexDecoder *GetVertexDecoder(u32 vtype);

Expand Down Expand Up @@ -90,6 +91,10 @@ class DrawEngineCommon {
// Fixed index buffer for easy quad generation from spline/bezier
u16 *quadIndices_;

// Shader blending state
bool fboTexNeedBind_;
bool fboTexBound_;

// Hardware tessellation
int numPatches;
class TessellationDataTransfer {
Expand Down
3 changes: 3 additions & 0 deletions GPU/Common/FramebufferCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,9 @@ class FramebufferManagerCommon {
return true;
}

VirtualFramebuffer *GetCurrentRenderVFB() const {
return currentRenderVfb_;
}
// TODO: Break out into some form of FBO manager
VirtualFramebuffer *GetVFBAt(u32 addr);
VirtualFramebuffer *GetDisplayVFB() {
Expand Down
4 changes: 1 addition & 3 deletions GPU/D3D11/DrawEngineD3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,7 @@ DrawEngineD3D11::DrawEngineD3D11(Draw::DrawContext *draw, ID3D11Device *device,
numDrawCalls(0),
vertexCountInDrawCalls(0),
decodeCounter_(0),
dcid_(0),
fboTexNeedBind_(false),
fboTexBound_(false) {
dcid_(0) {
decOptions_.expandAllWeightsToFloat = true;
decOptions_.expand8BitNormalsToFloat = true;

Expand Down
5 changes: 1 addition & 4 deletions GPU/D3D11/DrawEngineD3D11.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ class DrawEngineD3D11 : public DrawEngineCommon {

void ApplyDrawState(int prim);
void ApplyDrawStateLate(bool applyStencilRef, uint8_t stencilRef);
void ResetShaderBlending();

ID3D11InputLayout *SetupDecFmtForDraw(D3D11VertexShader *vshader, const DecVtxFormat &decFmt, u32 pspFmt);

Expand Down Expand Up @@ -245,10 +246,6 @@ class DrawEngineD3D11 : public DrawEngineCommon {
D3D11StateKeys keys_{};
D3D11DynamicState dynState_{};

// Initial work on shader blending
bool fboTexNeedBind_;
bool fboTexBound_;

// Hardware tessellation
class TessellationDataTransferD3D11 : public TessellationDataTransfer {
private:
Expand Down
86 changes: 63 additions & 23 deletions GPU/D3D11/FramebufferManagerD3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -398,19 +398,14 @@ void FramebufferManagerD3D11::BlitFramebufferDepth(VirtualFramebuffer *src, Virt
}
bool matchingDepthBuffer = src->z_address == dst->z_address && src->z_stride != 0 && dst->z_stride != 0;
bool matchingSize = src->width == dst->width && src->height == dst->height;
if (matchingDepthBuffer && matchingSize) {
// Doesn't work. Use a shader maybe?
draw_->BindBackbufferAsRenderTarget();
bool matchingRenderSize = src->renderWidth == dst->renderWidth && src->renderHeight == dst->renderHeight;
if (matchingDepthBuffer && matchingSize && matchingRenderSize) {
draw_->CopyFramebufferImage(src->fbo, 0, 0, 0, 0, dst->fbo, 0, 0, 0, 0, src->renderWidth, src->renderHeight, 1, Draw::FB_DEPTH_BIT);
RebindFramebuffer();
}
}

void FramebufferManagerD3D11::BindFramebufferColor(int stage, VirtualFramebuffer *framebuffer, int flags) {
if (framebuffer == NULL) {
framebuffer = currentRenderVfb_;
}

void FramebufferManagerD3D11::BindFramebufferAsColorTexture(int stage, VirtualFramebuffer *framebuffer, int flags) {
if (!framebuffer->fbo || !useBufferedRendering_) {
ID3D11ShaderResourceView *view = nullptr;
context_->PSSetShaderResources(stage, 1, &view);
Expand All @@ -425,7 +420,7 @@ void FramebufferManagerD3D11::BindFramebufferColor(int stage, VirtualFramebuffer
skipCopy = true;
}
// Currently rendering to this framebuffer. Need to make a copy.
if (!skipCopy && currentRenderVfb_ && framebuffer->fb_address == gstate.getFrameBufRawAddress()) {
if (!skipCopy && framebuffer == currentRenderVfb_) {
// TODO: Maybe merge with bvfbs_? Not sure if those could be packing, and they're created at a different size.
Draw::Framebuffer *renderCopy = GetTempFBO(framebuffer->renderWidth, framebuffer->renderHeight, (Draw::FBColorDepth)framebuffer->colorDepth);
if (renderCopy) {
Expand Down Expand Up @@ -459,8 +454,14 @@ void FramebufferManagerD3D11::BindFramebufferColor(int stage, VirtualFramebuffer
} else {
draw_->BindFramebufferAsTexture(framebuffer->fbo, stage, Draw::FB_COLOR_BIT, 0);
}
} else {
} else if (framebuffer != currentRenderVfb_) {
draw_->BindFramebufferAsTexture(framebuffer->fbo, stage, Draw::FB_COLOR_BIT, 0);
} else {
// Badness on D3D11 to bind the currently rendered-to framebuffer as a texture.
ID3D11ShaderResourceView *view = nullptr;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only skip copy should be during depal - which renders ultimately to a temp FBO. This doesn't happen in practice does it? Not sure if currentRenderVfb_ is always reset in that path.

-[Unknown]

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably not, just wanted to make sure all cases were handled. Should probably report.

context_->PSSetShaderResources(stage, 1, &view);
gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE;
return;
}
}

Expand Down Expand Up @@ -531,6 +532,53 @@ void FramebufferManagerD3D11::UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb)
// Nothing to do here.
}

void FramebufferManagerD3D11::SimpleBlit(
Draw::Framebuffer *dest, float destX1, float destY1, float destX2, float destY2,
Draw::Framebuffer *src, float srcX1, float srcY1, float srcX2, float srcY2, bool linearFilter) {

int destW, destH, srcW, srcH;
draw_->GetFramebufferDimensions(src, &srcW, &srcH);
draw_->GetFramebufferDimensions(dest, &destW, &destH);

if (srcW == destW && srcH == destH && destX2 - destX1 == srcX2 - srcX1 && destY2 - destY1 == srcY2 - srcY1) {
// Optimize to a copy
draw_->CopyFramebufferImage(src, 0, (int)srcX1, (int)srcY1, 0, dest, 0, (int)destX1, (int)destY1, 0, (int)(srcX2 - srcX1), (int)(srcY2 - srcY1), 1, Draw::FB_COLOR_BIT);
return;
}

float dX = 1.0f / (float)destW;
float dY = 1.0f / (float)destH;
float sX = 1.0f / (float)srcW;
float sY = 1.0f / (float)srcH;
struct Vtx {
float x, y, z, u, v;
};
Vtx vtx[4] = {
{ dX * destX1, dY * destY1, 0.0f, sX * srcX1, sY * srcY1 },
{ dX * destX2, dY * destY1, 0.0f, sX * srcX2, sY * srcY1 },
{ dX * destX1, dY * destY2, 0.0f, sX * srcX1, sY * srcY2 },
{ dX * destX2, dY * destY2, 0.0f, sX * srcX2, sY * srcY2 },
};

D3D11_MAPPED_SUBRESOURCE map;
context_->Map(quadBuffer_, 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
memcpy(map.pData, vtx, 4 * sizeof(Vtx));
context_->Unmap(quadBuffer_, 0);

draw_->BindFramebufferAsTexture(src, 0, Draw::FB_COLOR_BIT, 0);
draw_->BindFramebufferAsRenderTarget(dest);
Bind2DShader();
context_->RSSetState(stockD3D11.rasterStateNoCull);
context_->OMSetBlendState(stockD3D11.blendStateDisabledWithColorMask[0xF], nullptr, 0xFFFFFFFF);
context_->OMSetDepthStencilState(stockD3D11.depthStencilDisabled, 0);
context_->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
context_->PSSetSamplers(0, 1, linearFilter ? &stockD3D11.samplerLinear2DClamp : &stockD3D11.samplerPoint2DClamp);
UINT stride = sizeof(Vtx);
UINT offset = 0;
context_->IASetVertexBuffers(0, 1, &quadBuffer_, &stride, &offset);
context_->Draw(4, 0);
}

void FramebufferManagerD3D11::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp) {
if (!dst->fbo || !src->fbo || !useBufferedRendering_) {
// This can happen if they recently switched from non-buffered.
Expand Down Expand Up @@ -564,21 +612,13 @@ void FramebufferManagerD3D11::BlitFramebuffer(VirtualFramebuffer *dst, int dstX,
Draw::Framebuffer *srcFBO = src->fbo;
if (src == dst) {
Draw::Framebuffer *tempFBO = GetTempFBO(src->renderWidth, src->renderHeight, (Draw::FBColorDepth)src->colorDepth);
bool result = draw_->BlitFramebuffer(
src->fbo, srcX1, srcY1, srcX2, srcY2,
tempFBO, dstX1, dstY1, dstX2, dstY2,
Draw::FB_COLOR_BIT, Draw::FB_BLIT_NEAREST);
if (result) {
srcFBO = tempFBO;
}
SimpleBlit(tempFBO, dstX1, dstY1, dstX2, dstY2, src->fbo, srcX1, srcY1, srcX2, srcY2, false);
srcFBO = tempFBO;
}
bool result = draw_->BlitFramebuffer(
srcFBO, srcX1, srcY1, srcX2, srcY2,
SimpleBlit(
dst->fbo, dstX1, dstY1, dstX2, dstY2,
Draw::FB_COLOR_BIT, Draw::FB_BLIT_NEAREST);
if (!result) {
ERROR_LOG_REPORT(G3D, "fbo_blit_color failed in blit: %08x (%08x -> %08x)", src->fb_address, dst->fb_address);
}
srcFBO, srcX1, srcY1, srcX2, srcY2,
false);
}

// TODO: SSE/NEON
Expand Down
6 changes: 5 additions & 1 deletion GPU/D3D11/FramebufferManagerD3D11.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class FramebufferManagerD3D11 : public FramebufferManagerCommon {

void BlitFramebufferDepth(VirtualFramebuffer *src, VirtualFramebuffer *dst) override;

void BindFramebufferColor(int stage, VirtualFramebuffer *framebuffer, int flags);
void BindFramebufferAsColorTexture(int stage, VirtualFramebuffer *framebuffer, int flags);

void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override;
void DownloadFramebufferForClut(u32 fb_address, u32 loadBytes) override;
Expand Down Expand Up @@ -99,6 +99,10 @@ class FramebufferManagerD3D11 : public FramebufferManagerCommon {
void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) override;
void PackFramebufferD3D11_(VirtualFramebuffer *vfb, int x, int y, int w, int h);
void PackDepthbuffer(VirtualFramebuffer *vfb, int x, int y, int w, int h);
void SimpleBlit(
Draw::Framebuffer *dest, float destX1, float destY1, float destX2, float destY2,
Draw::Framebuffer *src, float srcX1, float srcY1, float srcX2, float srcY2,
bool linearFilter);

ID3D11Device *device_;
ID3D11DeviceContext *context_;
Expand Down
29 changes: 18 additions & 11 deletions GPU/D3D11/StateMappingD3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,12 @@ static const D3D11_LOGIC_OP logicOps[] = {
};
*/

static bool ApplyShaderBlending() {
return false;
}

static void ResetShaderBlending() {
//
void DrawEngineD3D11::ResetShaderBlending() {
if (fboTexBound_) {
ID3D11ShaderResourceView *srv = nullptr;
context_->PSSetShaderResources(0, 1, &srv);
fboTexBound_ = false;
}
}

class FramebufferManagerD3D11;
Expand All @@ -136,8 +136,7 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
memset(&keys_, 0, sizeof(keys_));
memset(&dynState_, 0, sizeof(dynState_));

// Unfortunately, this isn't implemented yet.
gstate_c.allowShaderBlend = false;
gstate_c.allowShaderBlend = !g_Config.bDisableSlowFramebufEffects;

// Set blend - unless we need to do it in the shader.
GenericBlendState blendState;
Expand All @@ -155,8 +154,7 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
ResetShaderBlending();
gstate_c.allowShaderBlend = false;
}
}
else if (blendState.resetShaderBlending) {
} else if (blendState.resetShaderBlending) {
ResetShaderBlending();
}

Expand Down Expand Up @@ -421,6 +419,15 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
}

void DrawEngineD3D11::ApplyDrawStateLate(bool applyStencilRef, uint8_t stencilRef) {
textureCache_->ApplyTexture();
if (!gstate.isModeClear()) {
if (fboTexNeedBind_) {
framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY);
// No sampler required, we do a Load in the pixel shader
// context_->PSSetSamplers(1, 1, &stockD3D11.samplerPoint2DClamp);
fboTexBound_ = true;
fboTexNeedBind_ = false;
}
textureCache_->ApplyTexture();
}
context_->OMSetDepthStencilState(depthStencilState_, applyStencilRef ? stencilRef : dynState_.stencilRef);
}
12 changes: 7 additions & 5 deletions GPU/D3D11/TextureCacheD3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,8 @@ void TextureCacheD3D11::DeleteTexture(TexCache::iterator it) {
void TextureCacheD3D11::ForgetLastTexture() {
lastBoundTexture = INVALID_TEX;
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
ID3D11ShaderResourceView *nullTex = nullptr;
context_->PSSetShaderResources(0, 1, &nullTex);
}

// Removes old textures.
Expand Down Expand Up @@ -534,7 +536,6 @@ void TextureCacheD3D11::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFra
ID3D11ShaderResourceView *clutTexture = depalShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBuf_);

Draw::Framebuffer *depalFBO = framebufferManagerD3D11_->GetTempFBO(framebuffer->renderWidth, framebuffer->renderHeight, Draw::FBO_8888);
draw_->BindFramebufferAsRenderTarget(depalFBO);
shaderManager_->DirtyLastShader();
draw_->BindPipeline(nullptr);

Expand All @@ -547,10 +548,12 @@ void TextureCacheD3D11::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFra

context_->PSSetShaderResources(1, 1, &clutTexture);
context_->PSSetSamplers(1, 1, &stockD3D11.samplerPoint2DWrap);
framebufferManagerD3D11_->BindFramebufferColor(0, framebuffer, BINDFBCOLOR_SKIP_COPY);
framebufferManagerD3D11_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_SKIP_COPY);
context_->PSSetSamplers(0, 1, &stockD3D11.samplerPoint2DWrap);
draw_->BindFramebufferAsRenderTarget(depalFBO);
shaderApply.Shade();

framebufferManagerD3D11_->RebindFramebuffer();
draw_->BindFramebufferAsTexture(depalFBO, 0, Draw::FB_COLOR_BIT, 0);

const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
Expand All @@ -562,13 +565,12 @@ void TextureCacheD3D11::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFra
} else {
entry->status &= ~TexCacheEntry::STATUS_DEPALETTIZE;

framebufferManagerD3D11_->BindFramebufferColor(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET);
framebufferManagerD3D11_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET);

gstate_c.textureFullAlpha = gstate.getTextureFormat() == GE_TFMT_5650;
gstate_c.textureSimpleAlpha = gstate_c.textureFullAlpha;
framebufferManagerD3D11_->RebindFramebuffer();
}

framebufferManagerD3D11_->RebindFramebuffer();
SamplerCacheKey samplerKey;
SetFramebufferSamplingParams(framebuffer->bufferWidth, framebuffer->bufferHeight, samplerKey);
ID3D11SamplerState *state = samplerCache_.GetOrCreateSampler(device_, samplerKey);
Expand Down
4 changes: 1 addition & 3 deletions GPU/Directx9/DrawEngineDX9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,7 @@ DrawEngineDX9::DrawEngineDX9(LPDIRECT3DDEVICE9 device)
numDrawCalls(0),
vertexCountInDrawCalls(0),
decodeCounter_(0),
dcid_(0),
fboTexNeedBind_(false),
fboTexBound_(false) {
dcid_(0) {
decOptions_.expandAllWeightsToFloat = true;
decOptions_.expand8BitNormalsToFloat = true;

Expand Down
4 changes: 0 additions & 4 deletions GPU/Directx9/DrawEngineDX9.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,6 @@ class DrawEngineDX9 : public DrawEngineCommon {

void ApplyDrawState(int prim);
void ApplyDrawStateLate();
bool ApplyShaderBlending();
void ResetShaderBlending();

IDirect3DVertexDeclaration9 *SetupDecFmtForDraw(VSShader *vshader, const DecVtxFormat &decFmt, u32 pspFmt);
Expand Down Expand Up @@ -219,9 +218,6 @@ class DrawEngineDX9 : public DrawEngineCommon {

UVScale uvScale[MAX_DEFERRED_DRAW_CALLS];

bool fboTexNeedBind_;
bool fboTexBound_;

// Hardware tessellation
class TessellationDataTransferDX9 : public TessellationDataTransfer {
private:
Expand Down
2 changes: 1 addition & 1 deletion GPU/Directx9/FramebufferDX9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,7 @@ static void DXSetViewport(float x, float y, float w, float h, float minZ, float
return offscreen;
}

void FramebufferManagerDX9::BindFramebufferColor(int stage, VirtualFramebuffer *framebuffer, int flags) {
void FramebufferManagerDX9::BindFramebufferAsColorTexture(int stage, VirtualFramebuffer *framebuffer, int flags) {
if (framebuffer == NULL) {
framebuffer = currentRenderVfb_;
}
Expand Down
Loading