Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Always prescale uv ("texcoord speedhack") #9176

Merged
merged 3 commits into from
Dec 20, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Core/Config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -730,7 +730,6 @@ static ConfigSetting debuggerSettings[] = {
};

static ConfigSetting speedHackSettings[] = {
ReportedConfigSetting("PrescaleUVCoords", &g_Config.bPrescaleUV, true, true, true),
ReportedConfigSetting("DisableAlphaTest", &g_Config.bDisableAlphaTest, false, true, true),

ConfigSetting(false),
Expand Down
9 changes: 1 addition & 8 deletions Core/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -360,14 +360,7 @@ struct Config {
float fAnalogLimiterDeadzone;
// GLES backend-specific hacks. Not saved to the ini file, do not add checkboxes. Will be made into
// proper options when good enough.
// PrescaleUV:
// * Applies UV scale/offset when decoding verts. Get rid of some work in the vertex shader,
// saves a uniform upload and is a prerequisite for future optimized hybrid
// (SW skinning, HW transform) skinning.
// * Still has major problems so off by default - need to store tex scale/offset per DeferredDrawCall,
// which currently isn't done so if texscale/offset isn't static (like in Tekken 6) things go wrong.
bool bPrescaleUV;
bool bDisableAlphaTest; // Helps PowerVR immensely, breaks some graphics
bool bDisableAlphaTest; // Helps PowerVR performance immensely, breaks some graphics
// End GLES hacks.

// Use the hardware scaler to scale up the image to save fillrate. Similar to Windows' window size, really.
Expand Down
2 changes: 1 addition & 1 deletion GPU/Common/DrawEngineCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

#define QUAD_INDICES_MAX 65536

DrawEngineCommon::DrawEngineCommon() : dec_(nullptr) {
DrawEngineCommon::DrawEngineCommon() : dec_(nullptr), decOptions_{} {
quadIndices_ = new u16[6 * QUAD_INDICES_MAX];
decJitCache_ = new VertexDecoderJitCache();
}
Expand Down
6 changes: 0 additions & 6 deletions GPU/Common/ShaderId.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ std::string VertexShaderDesc(const ShaderID &id) {
}
if (id.Bits(VS_BIT_MATERIAL_UPDATE, 3)) desc << "MatUp:" << id.Bits(VS_BIT_MATERIAL_UPDATE, 3) << " ";
if (id.Bits(VS_BIT_WEIGHT_FMTSCALE, 2)) desc << "WScale " << id.Bits(VS_BIT_WEIGHT_FMTSCALE, 2) << " ";
if (id.Bits(VS_BIT_TEXCOORD_FMTSCALE, 2)) desc << "TCScale " << id.Bits(VS_BIT_TEXCOORD_FMTSCALE, 2) << " ";
if (id.Bit(VS_BIT_FLATSHADE)) desc << "Flat ";

// TODO: More...
Expand Down Expand Up @@ -119,11 +118,6 @@ void ComputeVertexShaderID(ShaderID *id_out, u32 vertType, bool useHWTransform)

id.SetBit(VS_BIT_NORM_REVERSE, gstate.areNormalsReversed());
id.SetBit(VS_BIT_HAS_TEXCOORD, hasTexcoord);
if (doTextureProjection && gstate.getUVProjMode() == GE_PROJMAP_UV) {
id.SetBits(VS_BIT_TEXCOORD_FMTSCALE, 2, (vertType & GE_VTYPE_TC_MASK) >> GE_VTYPE_TC_SHIFT); // two bits
} else {
id.SetBits(VS_BIT_TEXCOORD_FMTSCALE, 2, 3); // float - no scaling
}
}

id.SetBit(VS_BIT_FLATSHADE, doFlatShading);
Expand Down
1 change: 0 additions & 1 deletion GPU/Common/ShaderId.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ enum {
VS_BIT_LIGHT3_ENABLE = 55,
VS_BIT_LIGHTING_ENABLE = 56,
VS_BIT_WEIGHT_FMTSCALE = 57, // only two bits, 1 free after
VS_BIT_TEXCOORD_FMTSCALE = 60,
VS_BIT_FLATSHADE = 62, // 1 free after
};

Expand Down
14 changes: 3 additions & 11 deletions GPU/Common/SoftwareTransformCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,12 +146,9 @@ void SoftwareTransform(

float uscale = 1.0f;
float vscale = 1.0f;
bool scaleUV = false;
if (throughmode) {
uscale /= gstate_c.curTextureWidth;
vscale /= gstate_c.curTextureHeight;
} else {
scaleUV = !g_Config.bPrescaleUV;
}

bool skinningEnabled = vertTypeIsSkinningEnabled(vertType);
Expand Down Expand Up @@ -317,14 +314,9 @@ void SoftwareTransform(
switch (gstate.getUVGenMode()) {
case GE_TEXMAP_TEXTURE_COORDS: // UV mapping
case GE_TEXMAP_UNKNOWN: // Seen in Riviera. Unsure of meaning, but this works.
// Texture scale/offset is only performed in this mode.
if (scaleUV) {
uv[0] = ruv[0]*gstate_c.uv.uScale + gstate_c.uv.uOff;
uv[1] = ruv[1]*gstate_c.uv.vScale + gstate_c.uv.vOff;
} else {
uv[0] = ruv[0];
uv[1] = ruv[1];
}
// We always prescale in the vertex decoder now.
uv[0] = ruv[0];
uv[1] = ruv[1];
uv[2] = 1.0f;
break;

Expand Down
12 changes: 6 additions & 6 deletions GPU/Common/SplineCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -864,21 +864,21 @@ void DrawEngineCommon::SubmitSpline(const void *control_points, const void *indi
u32 vertTypeWithIndex16 = (vertType & ~GE_VTYPE_IDX_MASK) | GE_VTYPE_IDX_16BIT;

UVScale prevUVScale;
if (g_Config.bPrescaleUV && (origVertType & GE_VTYPE_TC_MASK) != 0) {
if ((origVertType & GE_VTYPE_TC_MASK) != 0) {
// We scaled during Normalize already so let's turn it off when drawing.
prevUVScale = gstate_c.uv;
gstate_c.uv.uScale = 1.0f;
gstate_c.uv.vScale = 1.0f;
gstate_c.uv.uOff = 0;
gstate_c.uv.vOff = 0;
gstate_c.uv.uOff = 0.0f;
gstate_c.uv.vOff = 0.0f;
}

int generatedBytesRead;
DispatchSubmitPrim(splineBuffer, quadIndices_, primType[prim_type], count, vertTypeWithIndex16, &generatedBytesRead);

DispatchFlush();

if (g_Config.bPrescaleUV && (origVertType & GE_VTYPE_TC_MASK) != 0) {
if ((origVertType & GE_VTYPE_TC_MASK) != 0) {
gstate_c.uv = prevUVScale;
}
}
Expand Down Expand Up @@ -979,7 +979,7 @@ void DrawEngineCommon::SubmitBezier(const void *control_points, const void *indi
u32 vertTypeWithIndex16 = (vertType & ~GE_VTYPE_IDX_MASK) | GE_VTYPE_IDX_16BIT;

UVScale prevUVScale;
if (g_Config.bPrescaleUV && (origVertType & GE_VTYPE_TC_MASK) != 0) {
if (origVertType & GE_VTYPE_TC_MASK) {
// We scaled during Normalize already so let's turn it off when drawing.
prevUVScale = gstate_c.uv;
gstate_c.uv.uScale = 1.0f;
Expand All @@ -993,7 +993,7 @@ void DrawEngineCommon::SubmitBezier(const void *control_points, const void *indi

DispatchFlush();

if (g_Config.bPrescaleUV && (origVertType & GE_VTYPE_TC_MASK) != 0) {
if (origVertType & GE_VTYPE_TC_MASK) {
gstate_c.uv = prevUVScale;
}
}
17 changes: 0 additions & 17 deletions GPU/Common/VertexDecoderArm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,6 @@ static const JitLookup jitLookup[] = {
{&VertexDecoder::Step_WeightsU16Skin, &VertexDecoderJitCache::Jit_WeightsU16Skin},
{&VertexDecoder::Step_WeightsFloatSkin, &VertexDecoderJitCache::Jit_WeightsFloatSkin},

{&VertexDecoder::Step_TcU8, &VertexDecoderJitCache::Jit_TcU8},
{&VertexDecoder::Step_TcU16, &VertexDecoderJitCache::Jit_TcU16},
{&VertexDecoder::Step_TcFloat, &VertexDecoderJitCache::Jit_TcFloat},
{&VertexDecoder::Step_TcU16Double, &VertexDecoderJitCache::Jit_TcU16Double},

Expand Down Expand Up @@ -563,21 +561,6 @@ void VertexDecoderJitCache::Jit_WeightsFloatSkin() {
Jit_ApplyWeights();
}

// Fill last two bytes with zeroes to align to 4 bytes. LDRH does it for us, handy.
void VertexDecoderJitCache::Jit_TcU8() {
LDRB(tempReg1, srcReg, dec_->tcoff);
LDRB(tempReg2, srcReg, dec_->tcoff + 1);
ORR(tempReg1, tempReg1, Operand2(tempReg2, ST_LSL, 8));
STR(tempReg1, dstReg, dec_->decFmt.uvoff);
}

void VertexDecoderJitCache::Jit_TcU16() {
LDRH(tempReg1, srcReg, dec_->tcoff);
LDRH(tempReg2, srcReg, dec_->tcoff + 2);
ORR(tempReg1, tempReg1, Operand2(tempReg2, ST_LSL, 16));
STR(tempReg1, dstReg, dec_->decFmt.uvoff);
}

void VertexDecoderJitCache::Jit_TcFloat() {
LDR(tempReg1, srcReg, dec_->tcoff);
LDR(tempReg2, srcReg, dec_->tcoff + 4);
Expand Down
12 changes: 0 additions & 12 deletions GPU/Common/VertexDecoderArm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,6 @@ static const JitLookup jitLookup[] = {
{&VertexDecoder::Step_WeightsU16Skin, &VertexDecoderJitCache::Jit_WeightsU16Skin},
{&VertexDecoder::Step_WeightsFloatSkin, &VertexDecoderJitCache::Jit_WeightsFloatSkin},

{&VertexDecoder::Step_TcU8, &VertexDecoderJitCache::Jit_TcU8},
{&VertexDecoder::Step_TcU16, &VertexDecoderJitCache::Jit_TcU16},
{&VertexDecoder::Step_TcFloat, &VertexDecoderJitCache::Jit_TcFloat},
{&VertexDecoder::Step_TcU16Double, &VertexDecoderJitCache::Jit_TcU16Double},
{&VertexDecoder::Step_TcU8Prescale, &VertexDecoderJitCache::Jit_TcU8Prescale},
Expand Down Expand Up @@ -578,16 +576,6 @@ void VertexDecoderJitCache::Jit_Color5551() {
CSEL(fullAlphaReg, fullAlphaReg, WZR, CC_EQ);
}

void VertexDecoderJitCache::Jit_TcU8() {
LDURH(tempReg1, srcReg, dec_->tcoff);
STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.uvoff);
}

void VertexDecoderJitCache::Jit_TcU16() {
LDUR(tempReg1, srcReg, dec_->tcoff);
STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.uvoff);
}

void VertexDecoderJitCache::Jit_TcU16Through() {
LDRH(INDEX_UNSIGNED, tempReg1, srcReg, dec_->tcoff);
LDRH(INDEX_UNSIGNED, tempReg2, srcReg, dec_->tcoff + 2);
Expand Down
83 changes: 9 additions & 74 deletions GPU/Common/VertexDecoderCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,14 +264,6 @@ void VertexDecoder::Step_WeightsFloatSkin() const
}
}

void VertexDecoder::Step_TcU8() const
{
// u32 to write two bytes of zeroes for free.
u32 *uv = (u32*)(decoded_ + decFmt.uvoff);
const u16 *uvdata = (const u16*)(ptr_ + tcoff);
*uv = *uvdata;
}

void VertexDecoder::Step_TcU8ToFloat() const
{
// u32 to write two bytes of zeroes for free.
Expand All @@ -281,14 +273,6 @@ void VertexDecoder::Step_TcU8ToFloat() const
uv[1] = uvdata[1] * (1.0f / 128.0f);
}

void VertexDecoder::Step_TcU16() const
{
u32 *uv = (u32 *)(decoded_ + decFmt.uvoff);
// TODO: Fix big-endian without losing the optimization
const u32 *uvdata = (const u32*)(ptr_ + tcoff);
*uv = *uvdata;
}

void VertexDecoder::Step_TcU16ToFloat() const
{
float *uv = (float *)(decoded_ + decFmt.uvoff);
Expand Down Expand Up @@ -903,13 +887,6 @@ static const StepFunction wtstep_skin[4] = {
&VertexDecoder::Step_WeightsFloatSkin,
};

static const StepFunction tcstep[4] = {
0,
&VertexDecoder::Step_TcU8,
&VertexDecoder::Step_TcU16,
&VertexDecoder::Step_TcFloat,
};

static const StepFunction tcstepToFloat[4] = {
0,
&VertexDecoder::Step_TcU8ToFloat,
Expand Down Expand Up @@ -973,42 +950,20 @@ static const StepFunction tcstep_morph_remasterToFloat[4] = {
&VertexDecoder::Step_TcFloatMorph,
};

static const StepFunction tcstep_through[4] = {
0,
&VertexDecoder::Step_TcU8,
&VertexDecoder::Step_TcU16Through,
&VertexDecoder::Step_TcFloatThrough,
};

static const StepFunction tcstep_throughToFloat[4] = {
0,
&VertexDecoder::Step_TcU8ToFloat,
&VertexDecoder::Step_TcU16ThroughToFloat,
&VertexDecoder::Step_TcFloatThrough,
};

// Some HD Remaster games double the u16 texture coordinates.
static const StepFunction tcstep_remaster[4] = {
0,
&VertexDecoder::Step_TcU8,
&VertexDecoder::Step_TcU16Double,
&VertexDecoder::Step_TcFloat,
};

static const StepFunction tcstep_remasterToFloat[4] = {
0,
&VertexDecoder::Step_TcU8ToFloat,
&VertexDecoder::Step_TcU16DoubleToFloat,
&VertexDecoder::Step_TcFloat,
};

static const StepFunction tcstep_through_remaster[4] = {
0,
&VertexDecoder::Step_TcU8,
&VertexDecoder::Step_TcU16ThroughDouble,
&VertexDecoder::Step_TcFloatThrough,
};

static const StepFunction tcstep_through_remasterToFloat[4] = {
0,
&VertexDecoder::Step_TcU8ToFloat,
Expand Down Expand Up @@ -1173,41 +1128,21 @@ void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options,
biggest = tcalign[tc];

// NOTE: That we check getUVGenMode here means that we must include it in the decoder ID!
if (g_Config.bPrescaleUV && !throughmode && (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_COORDS || gstate.getUVGenMode() == GE_TEXMAP_UNKNOWN)) {
if (!throughmode && (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_COORDS || gstate.getUVGenMode() == GE_TEXMAP_UNKNOWN)) {
if (g_DoubleTextureCoordinates)
steps_[numSteps_++] = morphcount == 1 ? tcstep_prescale_remaster[tc] : tcstep_prescale_morph_remaster[tc];
else
steps_[numSteps_++] = morphcount == 1 ? tcstep_prescale[tc] : tcstep_prescale_morph[tc];
decFmt.uvfmt = DEC_FLOAT_2;
} else {
if (options.expandAllUVtoFloat) {
if (morphcount != 1 && !throughmode)
steps_[numSteps_++] = g_DoubleTextureCoordinates ? tcstep_morph_remasterToFloat[tc] : tcstep_morphToFloat[tc];
else if (g_DoubleTextureCoordinates)
steps_[numSteps_++] = throughmode ? tcstep_through_remasterToFloat[tc] : tcstep_remasterToFloat[tc];
else
steps_[numSteps_++] = throughmode ? tcstep_throughToFloat[tc] : tcstepToFloat[tc];
decFmt.uvfmt = DEC_FLOAT_2;
} else {
if (morphcount != 1 && !throughmode)
steps_[numSteps_++] = g_DoubleTextureCoordinates ? tcstep_morph_remaster[tc] : tcstep_morph[tc];
else if (g_DoubleTextureCoordinates)
steps_[numSteps_++] = throughmode ? tcstep_through_remaster[tc] : tcstep_remaster[tc];
else
steps_[numSteps_++] = throughmode ? tcstep_through[tc] : tcstep[tc];

switch (tc) {
case GE_VTYPE_TC_8BIT >> GE_VTYPE_TC_SHIFT:
decFmt.uvfmt = throughmode ? DEC_U8A_2 : DEC_U8_2;
break;
case GE_VTYPE_TC_16BIT >> GE_VTYPE_TC_SHIFT:
decFmt.uvfmt = throughmode ? DEC_U16A_2 : DEC_U16_2;
break;
case GE_VTYPE_TC_FLOAT >> GE_VTYPE_TC_SHIFT:
decFmt.uvfmt = DEC_FLOAT_2;
break;
}
}
// We now always expand UV to float.
if (morphcount != 1 && !throughmode)
steps_[numSteps_++] = g_DoubleTextureCoordinates ? tcstep_morph_remasterToFloat[tc] : tcstep_morphToFloat[tc];
else if (g_DoubleTextureCoordinates)
steps_[numSteps_++] = throughmode ? tcstep_through_remasterToFloat[tc] : tcstep_remasterToFloat[tc];
else
steps_[numSteps_++] = throughmode ? tcstep_throughToFloat[tc] : tcstepToFloat[tc];
decFmt.uvfmt = DEC_FLOAT_2;
}

decFmt.uvoff = decOff;
Expand Down
5 changes: 0 additions & 5 deletions GPU/Common/VertexDecoderCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,6 @@ int TranslateNumBones(int bones);
typedef void(*JittedVertexDecoder)(const u8 *src, u8 *dst, int count);

struct VertexDecoderOptions {
bool expandAllUVtoFloat;
bool expandAllWeightsToFloat;
bool expand8BitNormalsToFloat;
};
Expand Down Expand Up @@ -477,8 +476,6 @@ class VertexDecoder {
void Step_WeightsU16Skin() const;
void Step_WeightsFloatSkin() const;

void Step_TcU8() const;
void Step_TcU16() const;
void Step_TcU8ToFloat() const;
void Step_TcU16ToFloat() const;
void Step_TcFloat() const;
Expand Down Expand Up @@ -633,9 +630,7 @@ class VertexDecoderJitCache : public FakeGen::FakeXCodeBlock {
void Jit_WeightsU16Skin();
void Jit_WeightsFloatSkin();

void Jit_TcU8();
void Jit_TcU8ToFloat();
void Jit_TcU16();
void Jit_TcU16ToFloat();
void Jit_TcFloat();

Expand Down
13 changes: 0 additions & 13 deletions GPU/Common/VertexDecoderX86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,6 @@ static const JitLookup jitLookup[] = {
{&VertexDecoder::Step_WeightsU16Skin, &VertexDecoderJitCache::Jit_WeightsU16Skin},
{&VertexDecoder::Step_WeightsFloatSkin, &VertexDecoderJitCache::Jit_WeightsFloatSkin},

{&VertexDecoder::Step_TcU8, &VertexDecoderJitCache::Jit_TcU8},
{&VertexDecoder::Step_TcU16, &VertexDecoderJitCache::Jit_TcU16},
{&VertexDecoder::Step_TcU8ToFloat, &VertexDecoderJitCache::Jit_TcU8ToFloat},
{&VertexDecoder::Step_TcU16ToFloat, &VertexDecoderJitCache::Jit_TcU16ToFloat},
{&VertexDecoder::Step_TcFloat, &VertexDecoderJitCache::Jit_TcFloat},
Expand Down Expand Up @@ -687,17 +685,6 @@ void VertexDecoderJitCache::Jit_WeightsFloatSkin() {
}
}

// Fill last two bytes with zeroes to align to 4 bytes. MOVZX does it for us, handy.
void VertexDecoderJitCache::Jit_TcU8() {
MOVZX(32, 16, tempReg1, MDisp(srcReg, dec_->tcoff));
MOV(32, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1));
}

void VertexDecoderJitCache::Jit_TcU16() {
MOV(32, R(tempReg1), MDisp(srcReg, dec_->tcoff));
MOV(32, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1));
}

void VertexDecoderJitCache::Jit_TcU8ToFloat() {
Jit_AnyU8ToFloat(dec_->tcoff, 16);
MOVQ_xmm(MDisp(dstReg, dec_->decFmt.uvoff), XMM3);
Expand Down
Loading