Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Apply unsupported blending modes in the shader #6070

Merged
merged 11 commits into from
May 27, 2014
157 changes: 148 additions & 9 deletions GPU/GLES/FragmentShaderGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ ReplaceAlphaType ReplaceAlphaWithStencil() {
if (gstate.isAlphaBlendEnabled()) {
if (nonAlphaSrcFactors[gstate.getBlendFuncA()] && nonAlphaDestFactors[gstate.getBlendFuncB()]) {
return REPLACE_ALPHA_YES;
} else if (ShouldUseShaderBlending()) {
return REPLACE_ALPHA_YES;
} else {
if (gl_extensions.ARB_blend_func_extended) {
return REPLACE_ALPHA_DUALSOURCE;
Expand Down Expand Up @@ -277,10 +279,69 @@ static bool CanDoubleSrcBlendMode() {
}
}

// TODO: Setting to disable?
bool ShouldUseShaderBlending() {
if (!gstate.isAlphaBlendEnabled()) {
return false;
}
// We can't blit on GLES2, so we don't support it. We also want texelFetch (OpenGL 3.0+ / GLES3+.)
if (!gl_extensions.VersionGEThan(3, 0, 0) && !gl_extensions.GLES3) {
return false;
}

GEBlendSrcFactor funcA = gstate.getBlendFuncA();
GEBlendDstFactor funcB = gstate.getBlendFuncB();
GEBlendMode eq = gstate.getBlendEq();

if (eq == GE_BLENDMODE_ABSDIFF) {
return true;
}

// This normally involves a blit, so try to skip it.
if (AlphaToColorDoubling() || CanDoubleSrcBlendMode()) {
return false;
}

switch (funcA) {
case GE_SRCBLEND_DOUBLESRCALPHA:
case GE_SRCBLEND_DOUBLEINVSRCALPHA:
case GE_SRCBLEND_DOUBLEDSTALPHA:
case GE_SRCBLEND_DOUBLEINVDSTALPHA:
return true;

case GE_SRCBLEND_FIXA:
if (funcB == GE_DSTBLEND_FIXB) {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here you could also filter out the cases where one of the fix colors is 0xFFFFFF or 0x0, as those are already handled correctly by regular blending by replacing them with GL_ONE/GL_ZERO.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just did it, yeah.

-[Unknown]

u32 fixA = gstate.getFixA();
u32 fixB = gstate.getFixB();
// OpenGL only supports one constant color, so check if we could be more exact.
if (fixA != fixB && fixA != 0xFFFFFF - fixB && fixA != 0 && fixB != 0 && fixA != 0xFFFFFF && fixB != 0xFFFFFF) {
return true;
}
}

default:
break;
}

switch (funcB) {
case GE_DSTBLEND_DOUBLESRCALPHA:
case GE_DSTBLEND_DOUBLEINVSRCALPHA:
case GE_DSTBLEND_DOUBLEDSTALPHA:
case GE_DSTBLEND_DOUBLEINVDSTALPHA:
return true;

default:
break;
}

return false;
}

// Here we must take all the bits of the gstate that determine what the fragment shader will
// look like, and concatenate them together into an ID.
void ComputeFragmentShaderID(FragmentShaderID *id) {
int id0 = 0;
int id1 = 0;
if (gstate.isModeClear()) {
// We only need one clear shader, so let's ignore the rest of the bits.
id0 = 1;
Expand All @@ -296,7 +357,6 @@ void ComputeFragmentShaderID(FragmentShaderID *id) {
bool enableAlphaDoubling = !alphaToColorDoubling && CanDoubleSrcBlendMode();
bool doTextureProjection = gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX;
bool doTextureAlpha = gstate.isTextureAlphaUsed();
bool computeAbsdiff = gstate.getBlendEq() == GE_BLENDMODE_ABSDIFF;
ReplaceAlphaType stencilToAlpha = ReplaceAlphaWithStencil();

// All texfuncs except replace are the same for RGB as for RGBA with full alpha.
Expand Down Expand Up @@ -338,12 +398,17 @@ void ComputeFragmentShaderID(FragmentShaderID *id) {
else
gpuStats.numNonAlphaTestedDraws++;

if (computeAbsdiff) {
id0 |= (computeAbsdiff & 1) << 25;
if (ShouldUseShaderBlending()) {
// 12 bits total.
id1 |= 1;
id1 |= (gstate.getBlendEq() << 1);
id1 |= (gstate.getBlendFuncA() << 4);
id1 |= (gstate.getBlendFuncB() << 8);
}
}

id->d[0] = id0;
id->d[1] = id1;
}

// Missing: Z depth range
Expand Down Expand Up @@ -423,14 +488,24 @@ void GenerateFragmentShader(char *buffer) {
bool enableAlphaDoubling = !alphaToColorDoubling && CanDoubleSrcBlendMode();
bool doTextureProjection = gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX;
bool doTextureAlpha = gstate.isTextureAlphaUsed();
bool computeAbsdiff = gstate.getBlendEq() == GE_BLENDMODE_ABSDIFF;
ReplaceAlphaType stencilToAlpha = ReplaceAlphaWithStencil();

if (gstate_c.textureFullAlpha && gstate.getTextureFunction() != GE_TEXFUNC_REPLACE)
doTextureAlpha = false;

if (doTexture)
WRITE(p, "uniform sampler2D tex;\n");
if (ShouldUseShaderBlending() && !gstate.isModeClear()) {
if (!gl_extensions.NV_shader_framebuffer_fetch) {
WRITE(p, "uniform sampler2D fbotex;\n");
}
if (gstate.getBlendFuncA() == GE_SRCBLEND_FIXA) {
WRITE(p, "uniform vec3 u_blendFixA;\n");
}
if (gstate.getBlendFuncB() == GE_DSTBLEND_FIXB) {
WRITE(p, "uniform vec3 u_blendFixB;\n");
}
}

if (enableAlphaTest || enableColorTest) {
WRITE(p, "uniform vec4 u_alphacolorref;\n");
Expand Down Expand Up @@ -607,12 +682,76 @@ void GenerateFragmentShader(char *buffer) {
WRITE(p, " v = mix(vec4(u_fogcolor, v.a), v, fogCoef);\n");
// WRITE(p, " v.x = v_depth;\n");
}
}

// Handle ABSDIFF blending mode using NV_shader_framebuffer_fetch
if (computeAbsdiff && gl_extensions.NV_shader_framebuffer_fetch) {
WRITE(p, " lowp vec4 destColor = gl_LastFragData[0];\n");
WRITE(p, " gl_FragColor = abs(destColor - v);\n");
if (ShouldUseShaderBlending()) {
// If we have NV_shader_framebuffer_fetch / EXT_shader_framebuffer_fetch, we skip the blit.
// We can just read the prev value more directly.
// TODO: EXT_shader_framebuffer_fetch on iOS 6, possibly others.
if (gl_extensions.NV_shader_framebuffer_fetch) {
WRITE(p, " lowp vec4 destColor = gl_LastFragData[0];\n");
} else {
WRITE(p, " lowp vec4 destColor = texelFetch(fbotex, ivec2(gl_FragCoord.x, gl_FragCoord.y), 0);\n");
}

GEBlendSrcFactor funcA = gstate.getBlendFuncA();
GEBlendDstFactor funcB = gstate.getBlendFuncB();
GEBlendMode eq = gstate.getBlendEq();

const char *srcFactor = "vec3(1.0)";
const char *dstFactor = "vec3(0.0)";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or should be vec3(1.0) ? Probably doesn't matter and will get overwrite at below.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My intent is that if I messed up or if some blending func > 10 is used, it is treated as no blending.

-[Unknown]

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see.


switch (funcA)
{
case GE_SRCBLEND_DSTCOLOR: srcFactor = "destColor.rgb"; break;
case GE_SRCBLEND_INVDSTCOLOR: srcFactor = "(vec3(1.0) - destColor.rgb)"; break;
case GE_SRCBLEND_SRCALPHA: srcFactor = "vec3(v.a)"; break;
case GE_SRCBLEND_INVSRCALPHA: srcFactor = "vec3(1.0 - v.a)"; break;
case GE_SRCBLEND_DSTALPHA: srcFactor = "vec3(destColor.a)"; break;
case GE_SRCBLEND_INVDSTALPHA: srcFactor = "vec3(1.0 - destColor.a)"; break;
case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "vec3(v.a * 2.0)"; break;
// TODO: Double inverse, or inverse double? Following softgpu for now...
case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "vec3(1.0 - v.a * 2.0)"; break;
case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "vec3(destColor.a * 2.0)"; break;
case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "vec3(1.0 - destColor.a * 2.0)"; break;
case GE_SRCBLEND_FIXA: srcFactor = "u_blendFixA"; break;
}
switch (funcB)
{
case GE_DSTBLEND_SRCCOLOR: dstFactor = "v.rgb"; break;
case GE_DSTBLEND_INVSRCCOLOR: dstFactor = "(vec3(1.0) - v.rgb)"; break;
case GE_DSTBLEND_SRCALPHA: dstFactor = "vec3(v.a)"; break;
case GE_DSTBLEND_INVSRCALPHA: dstFactor = "vec3(1.0 - v.a)"; break;
case GE_DSTBLEND_DSTALPHA: dstFactor = "vec3(destColor.a)"; break;
case GE_DSTBLEND_INVDSTALPHA: dstFactor = "vec3(1.0 - destColor.a)"; break;
case GE_DSTBLEND_DOUBLESRCALPHA: dstFactor = "vec3(v.a * 2.0)"; break;
case GE_DSTBLEND_DOUBLEINVSRCALPHA: dstFactor = "vec3(1.0 - v.a * 2.0)"; break;
case GE_DSTBLEND_DOUBLEDSTALPHA: dstFactor = "vec3(destColor.a * 2.0)"; break;
case GE_DSTBLEND_DOUBLEINVDSTALPHA: dstFactor = "vec3(1.0 - destColor.a * 2.0)"; break;
case GE_DSTBLEND_FIXB: dstFactor = "u_blendFixB"; break;
}

switch (eq)
{
case GE_BLENDMODE_MUL_AND_ADD:
WRITE(p, " v.rgb = v.rgb * %s + destColor.rgb * %s;\n", srcFactor, dstFactor);
break;
case GE_BLENDMODE_MUL_AND_SUBTRACT:
WRITE(p, " v.rgb = v.rgb * %s - destColor.rgb * %s;\n", srcFactor, dstFactor);
break;
case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:
WRITE(p, " v.rgb = destColor.rgb * %s - v.rgb * %s;\n", srcFactor, dstFactor);
break;
case GE_BLENDMODE_MIN:
WRITE(p, " v.rgb = min(v.rgb, destColor.rgb);\n");
break;
case GE_BLENDMODE_MAX:
WRITE(p, " v.rgb = max(v.rgb, destColor.rgb);\n");
break;
case GE_BLENDMODE_ABSDIFF:
WRITE(p, " v.rgb = abs(v.rgb - destColor.rgb);\n");
break;
}
}
}

switch (stencilToAlpha) {
Expand Down
8 changes: 4 additions & 4 deletions GPU/GLES/FragmentShaderGenerator.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
#include "Globals.h"

struct FragmentShaderID {
FragmentShaderID() {d[0] = 0xFFFFFFFF;}
void clear() {d[0] = 0xFFFFFFFF;}
u32 d[1];
FragmentShaderID() {clear();}
void clear() {d[0] = 0xFFFFFFFF; d[1] = 0xFFFFFFFF;}
u32 d[2];
bool operator < (const FragmentShaderID &other) const {
for (size_t i = 0; i < sizeof(d) / sizeof(u32); i++) {
if (d[i] < other.d[i])
Expand Down Expand Up @@ -62,4 +62,4 @@ bool IsAlphaTestTriviallyTrue();
bool IsColorTestTriviallyTrue();
StencilValueType ReplaceAlphaWithStencilType();
ReplaceAlphaType ReplaceAlphaWithStencil();

bool ShouldUseShaderBlending();
27 changes: 20 additions & 7 deletions GPU/GLES/Framebuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -784,7 +784,7 @@ void FramebufferManager::DoSetRenderFrameBuffer() {

// None found? Create one.
if (!vfb) {
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
textureCache_->ForgetLastTexture();
vfb = new VirtualFramebuffer();
vfb->fbo = 0;
vfb->fb_address = fb_address;
Expand Down Expand Up @@ -891,7 +891,7 @@ void FramebufferManager::DoSetRenderFrameBuffer() {
// Use it as a render target.
DEBUG_LOG(SCEGE, "Switching render target to FBO for %08x: %i x %i x %i ", vfb->fb_address, vfb->width, vfb->height, vfb->format);
vfb->usageFlags |= FB_USAGE_RENDERTARGET;
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
textureCache_->ForgetLastTexture();
vfb->last_frame_render = gpuStats.numFlips;
frameLastFramebufUsed = gpuStats.numFlips;
vfb->dirtyAfterDisplay = true;
Expand Down Expand Up @@ -992,6 +992,7 @@ void FramebufferManager::BindFramebufferDepth(VirtualFramebuffer *sourceframebuf
// Let's only do this if not clearing.
if (!gstate.isModeClear() || !gstate.isClearModeDepthMask()) {
fbo_bind_for_read(sourceframebuffer->fbo);
glDisable(GL_SCISSOR_TEST);

#if defined(USING_GLES2) && (defined(ANDROID) || defined(BLACKBERRY)) // We only support this extension on Android, it's not even available on PC.
if (useNV) {
Expand All @@ -1000,13 +1001,19 @@ void FramebufferManager::BindFramebufferDepth(VirtualFramebuffer *sourceframebuf
#endif // defined(USING_GLES2) && (defined(ANDROID) || defined(BLACKBERRY))
glBlitFramebuffer(0, 0, sourceframebuffer->renderWidth, sourceframebuffer->renderHeight, 0, 0, targetframebuffer->renderWidth, targetframebuffer->renderHeight, GL_DEPTH_BUFFER_BIT, GL_NEAREST);
// If we set targetframebuffer->depthUpdated here, our optimization above would be pointless.

glstate.scissorTest.restore();
}
#endif
}
}
}

void FramebufferManager::BindFramebufferColor(VirtualFramebuffer *framebuffer) {
if (framebuffer == NULL) {
framebuffer = currentRenderVfb_;
}

if (!framebuffer->fbo || !useBufferedRendering_) {
glBindTexture(GL_TEXTURE_2D, 0);
gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE;
Expand Down Expand Up @@ -1041,6 +1048,7 @@ void FramebufferManager::BindFramebufferColor(VirtualFramebuffer *framebuffer) {

fbo_bind_as_render_target(renderCopy);
glViewport(0, 0, framebuffer->renderWidth, framebuffer->renderHeight);
glDisable(GL_SCISSOR_TEST);
fbo_bind_for_read(framebuffer->fbo);

#if defined(USING_GLES2) && (defined(ANDROID) || defined(BLACKBERRY)) // We only support this extension on Android, it's not even available on PC.
Expand All @@ -1052,6 +1060,8 @@ void FramebufferManager::BindFramebufferColor(VirtualFramebuffer *framebuffer) {

fbo_bind_as_render_target(currentRenderVfb_->fbo);
fbo_bind_color_as_texture(renderCopy, 0);
glstate.viewport.restore();
glstate.scissorTest.restore();
#endif
} else {
fbo_bind_color_as_texture(framebuffer->fbo, 0);
Expand Down Expand Up @@ -1238,7 +1248,7 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool s
glEnable(GL_DITHER);
} else {
nvfb->usageFlags |= FB_USAGE_RENDERTARGET;
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
textureCache_->ForgetLastTexture();
nvfb->last_frame_render = gpuStats.numFlips;
nvfb->dirtyAfterDisplay = true;

Expand Down Expand Up @@ -1317,7 +1327,7 @@ void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int
}

fbo_bind_as_render_target(dst->fbo);

glDisable(GL_SCISSOR_TEST);

#ifndef USING_GLES2
if (gl_extensions.FBO_ARB) {
Expand Down Expand Up @@ -1373,7 +1383,7 @@ void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int
// Make sure our 2D drawing program is ready. Compiles only if not already compiled.
CompileDraw2DProgram();

glstate.viewport.set(0, 0, dst->width, dst->height);
glViewport(0, 0, dst->width, dst->height);
DisableState();

// The first four coordinates are relative to the 6th and 7th arguments of DrawActiveTexture.
Expand All @@ -1382,8 +1392,11 @@ void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int
float srcH = src->height;
DrawActiveTexture(0, dstX, dstY, w, h, dst->width, dst->height, false, srcX / srcW, srcY / srcH, (srcX + w) / srcW, (srcY + h) / srcH, draw2dprogram_);
glBindTexture(GL_TEXTURE_2D, 0);
textureCache_->ForgetLastTexture();
}

glstate.scissorTest.restore();
glstate.viewport.restore();
fbo_unbind();
}

Expand Down Expand Up @@ -1911,7 +1924,7 @@ bool FramebufferManager::NotifyFramebufferCopy(u32 src, u32 dst, int size) {
fbo_unbind();
}
glstate.viewport.restore();
gstate_c.textureChanged = TEXCHANGE_PARAMSONLY;
textureCache_->ForgetLastTexture();
// This is a memcpy, let's still copy just in case.
return false;
}
Expand Down Expand Up @@ -2045,7 +2058,7 @@ void FramebufferManager::NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride,
fbo_unbind();
}
glstate.viewport.restore();
gstate_c.textureChanged = TEXCHANGE_PARAMSONLY;
textureCache_->ForgetLastTexture();
}
}
}
Expand Down
18 changes: 16 additions & 2 deletions GPU/GLES/GLES_GPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,8 @@ static const CommandTableEntry commandTable[] = {
{GE_CMD_STENCILTESTENABLE, FLAG_FLUSHBEFOREONCHANGE},
{GE_CMD_ALPHABLENDENABLE, FLAG_FLUSHBEFOREONCHANGE},
{GE_CMD_BLENDMODE, FLAG_FLUSHBEFOREONCHANGE},
{GE_CMD_BLENDFIXEDA, FLAG_FLUSHBEFOREONCHANGE},
{GE_CMD_BLENDFIXEDB, FLAG_FLUSHBEFOREONCHANGE},
{GE_CMD_BLENDFIXEDA, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_BlendFixA},
{GE_CMD_BLENDFIXEDB, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_BlendFixB},
{GE_CMD_MASKRGB, FLAG_FLUSHBEFOREONCHANGE},
{GE_CMD_MASKALPHA, FLAG_FLUSHBEFOREONCHANGE},
{GE_CMD_ZTEST, FLAG_FLUSHBEFOREONCHANGE},
Expand Down Expand Up @@ -1080,6 +1080,14 @@ void GLES_GPU::Execute_ColorRef(u32 op, u32 diff) {
shaderManager_->DirtyUniform(DIRTY_ALPHACOLORREF);
}

void GLES_GPU::Execute_BlendFixA(u32 op, u32 diff) {
shaderManager_->DirtyUniform(DIRTY_BLENDFIX);
}

void GLES_GPU::Execute_BlendFixB(u32 op, u32 diff) {
shaderManager_->DirtyUniform(DIRTY_BLENDFIX);
}

void GLES_GPU::Execute_WorldMtxNum(u32 op, u32 diff) {
// This is almost always followed by GE_CMD_WORLDMATRIXDATA.
const u32_le *src = (const u32_le *)Memory::GetPointer(currentList->pc + 4);
Expand Down Expand Up @@ -1607,8 +1615,14 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
//////////////////////////////////////////////////////////////////
case GE_CMD_ALPHABLENDENABLE:
case GE_CMD_BLENDMODE:
break;

case GE_CMD_BLENDFIXEDA:
Execute_BlendFixA(op, diff);
break;

case GE_CMD_BLENDFIXEDB:
Execute_BlendFixB(op, diff);
break;

case GE_CMD_ALPHATESTENABLE:
Expand Down
Loading