Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

D3D11 shader blend and depal #9319

Merged
merged 10 commits into from
Feb 17, 2017
3 changes: 1 addition & 2 deletions GPU/Common/DepalettizeShaderCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat, ShaderLang
if (language == HLSL_D3D11) {
WRITE(p, "SamplerState texSamp : register(s0);\n");
WRITE(p, "Texture2D<float4> tex : register(t0);\n");
WRITE(p, "SamplerState palSamp : register(s1);\n");
WRITE(p, "Texture2D<float4> pal : register(t1);\n");
} else if (language == GLSL_VULKAN) {
WRITE(p, "#version 140\n");
Expand Down Expand Up @@ -121,7 +120,7 @@ void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat, ShaderLang
}

if (language == HLSL_D3D11) {
WRITE(p, " return pal.Sample(palSamp, float2((float(index) + 0.5) * (1.0 / %f), 0.0));\n", texturePixels);
WRITE(p, " return pal.Load(int3(index, 0, 0));\n");
} else {
WRITE(p, " fragColor0 = texture(pal, vec2((float(index) + 0.5) * (1.0 / %f), 0.0));\n", texturePixels);
}
Expand Down
34 changes: 33 additions & 1 deletion GPU/Common/DrawEngineCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,11 @@

#define QUAD_INDICES_MAX 65536

DrawEngineCommon::DrawEngineCommon() : dec_(nullptr), decOptions_{} {
DrawEngineCommon::DrawEngineCommon()
: dec_(nullptr),
decOptions_{},
fboTexNeedBind_(false),
fboTexBound_(false) {
quadIndices_ = new u16[6 * QUAD_INDICES_MAX];
decJitCache_ = new VertexDecoderJitCache();
}
Expand Down Expand Up @@ -462,3 +466,31 @@ u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr,
// Okay, there we are! Return the new type (but keep the index bits)
return GE_VTYPE_TC_FLOAT | GE_VTYPE_COL_8888 | GE_VTYPE_NRM_FLOAT | GE_VTYPE_POS_FLOAT | (vertType & (GE_VTYPE_IDX_MASK | GE_VTYPE_THROUGH));
}

bool DrawEngineCommon::ApplyShaderBlending() {
if (gstate_c.featureFlags & GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH) {
return true;
}

static const int MAX_REASONABLE_BLITS_PER_FRAME = 24;

static int lastFrameBlit = -1;
static int blitsThisFrame = 0;
if (lastFrameBlit != gpuStats.numFlips) {
if (blitsThisFrame > MAX_REASONABLE_BLITS_PER_FRAME) {
WARN_LOG_REPORT_ONCE(blendingBlit, G3D, "Lots of blits needed for obscure blending: %d per frame, blend %d/%d/%d", blitsThisFrame, gstate.getBlendFuncA(), gstate.getBlendFuncB(), gstate.getBlendEq());
}
blitsThisFrame = 0;
lastFrameBlit = gpuStats.numFlips;
}
++blitsThisFrame;
if (blitsThisFrame > MAX_REASONABLE_BLITS_PER_FRAME * 2) {
WARN_LOG_ONCE(blendingBlit2, G3D, "Skipping additional blits needed for obscure blending: %d per frame, blend %d/%d/%d", blitsThisFrame, gstate.getBlendFuncA(), gstate.getBlendFuncB(), gstate.getBlendEq());
return false;
}

fboTexNeedBind_ = true;

gstate_c.Dirty(DIRTY_SHADERBLEND);
return true;
}
5 changes: 5 additions & 0 deletions GPU/Common/DrawEngineCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class DrawEngineCommon {
u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType);

void ApplyClearToMemory(int x1, int y1, int x2, int y2, u32 clearColor);
bool ApplyShaderBlending();

VertexDecoder *GetVertexDecoder(u32 vtype);

Expand Down Expand Up @@ -90,6 +91,10 @@ class DrawEngineCommon {
// Fixed index buffer for easy quad generation from spline/bezier
u16 *quadIndices_;

// Shader blending state
bool fboTexNeedBind_;
bool fboTexBound_;

// Hardware tessellation
int numPatches;
class TessellationDataTransfer {
Expand Down
3 changes: 3 additions & 0 deletions GPU/Common/FramebufferCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,9 @@ class FramebufferManagerCommon {
return true;
}

VirtualFramebuffer *GetCurrentRenderVFB() const {
return currentRenderVfb_;
}
// TODO: Break out into some form of FBO manager
VirtualFramebuffer *GetVFBAt(u32 addr);
VirtualFramebuffer *GetDisplayVFB() {
Expand Down
8 changes: 0 additions & 8 deletions GPU/D3D11/DepalettizeShaderD3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,21 +60,13 @@ DepalShaderCacheD3D11::DepalShaderCacheD3D11(ID3D11Device *device, ID3D11DeviceC
std::vector<uint8_t> vsByteCode;
vertexShader_ = CreateVertexShaderD3D11(device, depalVShaderHLSL, strlen(depalVShaderHLSL), &vsByteCode);
device_->CreateInputLayout(g_DepalVertexElements, ARRAY_SIZE(g_DepalVertexElements), vsByteCode.data(), vsByteCode.size(), &inputLayout_);

D3D11_SAMPLER_DESC sampDesc{};
sampDesc.AddressU = D3D11_TEXTURE_ADDRESS_WRAP;
sampDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP;
sampDesc.AddressW = D3D11_TEXTURE_ADDRESS_WRAP;
sampDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT;
device_->CreateSamplerState(&sampDesc, &clutSampler);
}

DepalShaderCacheD3D11::~DepalShaderCacheD3D11() {
Clear();
if (vertexShader_) {
vertexShader_->Release();
}
clutSampler->Release();
}

u32 DepalShaderCacheD3D11::GenerateShaderID(GEPaletteFormat clutFormat, GEBufferFormat pixelFormat) {
Expand Down
2 changes: 0 additions & 2 deletions GPU/D3D11/DepalettizeShaderD3D11.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ class DepalShaderCacheD3D11 {
ID3D11VertexShader *GetDepalettizeVertexShader() { return vertexShader_; }
ID3D11InputLayout *GetInputLayout() { return inputLayout_; }
ID3D11ShaderResourceView *GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut);
ID3D11SamplerState *GetClutSampler() { return clutSampler; }
void Clear();
void Decimate();

Expand All @@ -62,7 +61,6 @@ class DepalShaderCacheD3D11 {
ID3D11DeviceContext *context_;
ID3D11VertexShader *vertexShader_ = nullptr;
ID3D11InputLayout *inputLayout_ = nullptr;
ID3D11SamplerState *clutSampler = nullptr;

std::map<u32, DepalShaderD3D11 *> cache_;
std::map<u32, DepalTextureD3D11 *> texCache_;
Expand Down
4 changes: 1 addition & 3 deletions GPU/D3D11/DrawEngineD3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,7 @@ DrawEngineD3D11::DrawEngineD3D11(Draw::DrawContext *draw, ID3D11Device *device,
numDrawCalls(0),
vertexCountInDrawCalls(0),
decodeCounter_(0),
dcid_(0),
fboTexNeedBind_(false),
fboTexBound_(false) {
dcid_(0) {
decOptions_.expandAllWeightsToFloat = true;
decOptions_.expand8BitNormalsToFloat = true;

Expand Down
7 changes: 3 additions & 4 deletions GPU/D3D11/DrawEngineD3D11.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ class DrawEngineD3D11 : public DrawEngineCommon {

void ApplyDrawState(int prim);
void ApplyDrawStateLate(bool applyStencilRef, uint8_t stencilRef);
void ResetShaderBlending();

ID3D11InputLayout *SetupDecFmtForDraw(D3D11VertexShader *vshader, const DecVtxFormat &decFmt, u32 pspFmt);

Expand Down Expand Up @@ -239,16 +240,14 @@ class DrawEngineD3D11 : public DrawEngineCommon {
std::map<uint32_t, ID3D11RasterizerState *> rasterCache_;

// Keep the depth state between ApplyDrawState and ApplyDrawStateLate
ID3D11RasterizerState *rasterState_;
ID3D11BlendState *blendState_;
ID3D11DepthStencilState *depthStencilState_;

// State keys
D3D11StateKeys keys_{};
D3D11DynamicState dynState_{};

// Initial work on shader blending
bool fboTexNeedBind_;
bool fboTexBound_;

// Hardware tessellation
class TessellationDataTransferD3D11 : public TessellationDataTransfer {
private:
Expand Down
88 changes: 64 additions & 24 deletions GPU/D3D11/FramebufferManagerD3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ void FramebufferManagerD3D11::ReformatFramebufferFrom(VirtualFramebuffer *vfb, G
D3D11_VIEWPORT vp{ 0.0f, 0.0f, (float)vfb->renderWidth, (float)vfb->renderHeight, 0.0f, 1.0f };
context_->RSSetViewports(1, &vp);
context_->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
context_->Draw(2, 0);
context_->Draw(4, 0);
}

RebindFramebuffer();
Expand Down Expand Up @@ -398,19 +398,14 @@ void FramebufferManagerD3D11::BlitFramebufferDepth(VirtualFramebuffer *src, Virt
}
bool matchingDepthBuffer = src->z_address == dst->z_address && src->z_stride != 0 && dst->z_stride != 0;
bool matchingSize = src->width == dst->width && src->height == dst->height;
if (matchingDepthBuffer && matchingSize) {
// Doesn't work. Use a shader maybe?
draw_->BindBackbufferAsRenderTarget();
bool matchingRenderSize = src->renderWidth == dst->renderWidth && src->renderHeight == dst->renderHeight;
if (matchingDepthBuffer && matchingSize && matchingRenderSize) {
draw_->CopyFramebufferImage(src->fbo, 0, 0, 0, 0, dst->fbo, 0, 0, 0, 0, src->renderWidth, src->renderHeight, 1, Draw::FB_DEPTH_BIT);
RebindFramebuffer();
}
}

void FramebufferManagerD3D11::BindFramebufferColor(int stage, VirtualFramebuffer *framebuffer, int flags) {
if (framebuffer == NULL) {
framebuffer = currentRenderVfb_;
}

void FramebufferManagerD3D11::BindFramebufferAsColorTexture(int stage, VirtualFramebuffer *framebuffer, int flags) {
if (!framebuffer->fbo || !useBufferedRendering_) {
ID3D11ShaderResourceView *view = nullptr;
context_->PSSetShaderResources(stage, 1, &view);
Expand All @@ -425,7 +420,7 @@ void FramebufferManagerD3D11::BindFramebufferColor(int stage, VirtualFramebuffer
skipCopy = true;
}
// Currently rendering to this framebuffer. Need to make a copy.
if (!skipCopy && currentRenderVfb_ && framebuffer->fb_address == gstate.getFrameBufRawAddress()) {
if (!skipCopy && framebuffer == currentRenderVfb_) {
// TODO: Maybe merge with bvfbs_? Not sure if those could be packing, and they're created at a different size.
Draw::Framebuffer *renderCopy = GetTempFBO(framebuffer->renderWidth, framebuffer->renderHeight, (Draw::FBColorDepth)framebuffer->colorDepth);
if (renderCopy) {
Expand Down Expand Up @@ -459,8 +454,14 @@ void FramebufferManagerD3D11::BindFramebufferColor(int stage, VirtualFramebuffer
} else {
draw_->BindFramebufferAsTexture(framebuffer->fbo, stage, Draw::FB_COLOR_BIT, 0);
}
} else {
} else if (framebuffer != currentRenderVfb_) {
draw_->BindFramebufferAsTexture(framebuffer->fbo, stage, Draw::FB_COLOR_BIT, 0);
} else {
// Badness on D3D11 to bind the currently rendered-to framebuffer as a texture.
ID3D11ShaderResourceView *view = nullptr;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only skip copy should be during depal - which renders ultimately to a temp FBO. This doesn't happen in practice does it? Not sure if currentRenderVfb_ is always reset in that path.

-[Unknown]

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably not, just wanted to make sure all cases were handled. Should probably report.

context_->PSSetShaderResources(stage, 1, &view);
gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE;
return;
}
}

Expand Down Expand Up @@ -531,6 +532,53 @@ void FramebufferManagerD3D11::UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb)
// Nothing to do here.
}

void FramebufferManagerD3D11::SimpleBlit(
Draw::Framebuffer *dest, float destX1, float destY1, float destX2, float destY2,
Draw::Framebuffer *src, float srcX1, float srcY1, float srcX2, float srcY2, bool linearFilter) {

int destW, destH, srcW, srcH;
draw_->GetFramebufferDimensions(src, &srcW, &srcH);
draw_->GetFramebufferDimensions(dest, &destW, &destH);

if (srcW == destW && srcH == destH && destX2 - destX1 == srcX2 - srcX1 && destY2 - destY1 == srcY2 - srcY1) {
// Optimize to a copy
draw_->CopyFramebufferImage(src, 0, (int)srcX1, (int)srcY1, 0, dest, 0, (int)destX1, (int)destY1, 0, (int)(srcX2 - srcX1), (int)(srcY2 - srcY1), 1, Draw::FB_COLOR_BIT);
return;
}

float dX = 1.0f / (float)destW;
float dY = 1.0f / (float)destH;
float sX = 1.0f / (float)srcW;
float sY = 1.0f / (float)srcH;
struct Vtx {
float x, y, z, u, v;
};
Vtx vtx[4] = {
{ dX * destX1, dY * destY1, 0.0f, sX * srcX1, sY * srcY1 },
{ dX * destX2, dY * destY1, 0.0f, sX * srcX2, sY * srcY1 },
{ dX * destX1, dY * destY2, 0.0f, sX * srcX1, sY * srcY2 },
{ dX * destX2, dY * destY2, 0.0f, sX * srcX2, sY * srcY2 },
};

D3D11_MAPPED_SUBRESOURCE map;
context_->Map(quadBuffer_, 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
memcpy(map.pData, vtx, 4 * sizeof(Vtx));
context_->Unmap(quadBuffer_, 0);

draw_->BindFramebufferAsTexture(src, 0, Draw::FB_COLOR_BIT, 0);
draw_->BindFramebufferAsRenderTarget(dest);
Bind2DShader();
context_->RSSetState(stockD3D11.rasterStateNoCull);
context_->OMSetBlendState(stockD3D11.blendStateDisabledWithColorMask[0xF], nullptr, 0xFFFFFFFF);
context_->OMSetDepthStencilState(stockD3D11.depthStencilDisabled, 0);
context_->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
context_->PSSetSamplers(0, 1, linearFilter ? &stockD3D11.samplerLinear2DClamp : &stockD3D11.samplerPoint2DClamp);
UINT stride = sizeof(Vtx);
UINT offset = 0;
context_->IASetVertexBuffers(0, 1, &quadBuffer_, &stride, &offset);
context_->Draw(4, 0);
}

void FramebufferManagerD3D11::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp) {
if (!dst->fbo || !src->fbo || !useBufferedRendering_) {
// This can happen if they recently switched from non-buffered.
Expand Down Expand Up @@ -564,21 +612,13 @@ void FramebufferManagerD3D11::BlitFramebuffer(VirtualFramebuffer *dst, int dstX,
Draw::Framebuffer *srcFBO = src->fbo;
if (src == dst) {
Draw::Framebuffer *tempFBO = GetTempFBO(src->renderWidth, src->renderHeight, (Draw::FBColorDepth)src->colorDepth);
bool result = draw_->BlitFramebuffer(
src->fbo, srcX1, srcY1, srcX2, srcY2,
tempFBO, dstX1, dstY1, dstX2, dstY2,
Draw::FB_COLOR_BIT, Draw::FB_BLIT_NEAREST);
if (result) {
srcFBO = tempFBO;
}
SimpleBlit(tempFBO, dstX1, dstY1, dstX2, dstY2, src->fbo, srcX1, srcY1, srcX2, srcY2, false);
srcFBO = tempFBO;
}
bool result = draw_->BlitFramebuffer(
srcFBO, srcX1, srcY1, srcX2, srcY2,
SimpleBlit(
dst->fbo, dstX1, dstY1, dstX2, dstY2,
Draw::FB_COLOR_BIT, Draw::FB_BLIT_NEAREST);
if (!result) {
ERROR_LOG_REPORT(G3D, "fbo_blit_color failed in blit: %08x (%08x -> %08x)", src->fb_address, dst->fb_address);
}
srcFBO, srcX1, srcY1, srcX2, srcY2,
false);
}

// TODO: SSE/NEON
Expand Down
6 changes: 5 additions & 1 deletion GPU/D3D11/FramebufferManagerD3D11.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class FramebufferManagerD3D11 : public FramebufferManagerCommon {

void BlitFramebufferDepth(VirtualFramebuffer *src, VirtualFramebuffer *dst) override;

void BindFramebufferColor(int stage, VirtualFramebuffer *framebuffer, int flags);
void BindFramebufferAsColorTexture(int stage, VirtualFramebuffer *framebuffer, int flags);

void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override;
void DownloadFramebufferForClut(u32 fb_address, u32 loadBytes) override;
Expand Down Expand Up @@ -99,6 +99,10 @@ class FramebufferManagerD3D11 : public FramebufferManagerCommon {
void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) override;
void PackFramebufferD3D11_(VirtualFramebuffer *vfb, int x, int y, int w, int h);
void PackDepthbuffer(VirtualFramebuffer *vfb, int x, int y, int w, int h);
void SimpleBlit(
Draw::Framebuffer *dest, float destX1, float destY1, float destX2, float destY2,
Draw::Framebuffer *src, float srcX1, float srcY1, float srcX2, float srcY2,
bool linearFilter);

ID3D11Device *device_;
ID3D11DeviceContext *context_;
Expand Down
Loading