Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use AVX2 gather for samplerjit #15275

Merged
merged 6 commits into from
Jan 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 19 additions & 13 deletions Common/x64Emitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1701,16 +1701,15 @@ void XEmitter::PSRLQ(X64Reg reg, int shift)
Write8(shift);
}

void XEmitter::PSRLQ(X64Reg reg, OpArg arg)
{
WriteSSEOp(0x66, 0xd3, reg, arg);
}

void XEmitter::PSRLDQ(X64Reg reg, int shift) {
WriteSSEOp(0x66, 0x73, (X64Reg)3, R(reg));
Write8(shift);
}

void XEmitter::PSRLW(X64Reg reg, OpArg arg) { WriteSSEOp(0x66, 0xD1, reg, arg); }
void XEmitter::PSRLD(X64Reg reg, OpArg arg) { WriteSSEOp(0x66, 0xD2, reg, arg); }
void XEmitter::PSRLQ(X64Reg reg, OpArg arg) { WriteSSEOp(0x66, 0xD3, reg, arg); }

void XEmitter::PSLLW(X64Reg reg, int shift)
{
WriteSSEOp(0x66, 0x71, (X64Reg)6, R(reg));
Expand All @@ -1734,6 +1733,10 @@ void XEmitter::PSLLDQ(X64Reg reg, int shift) {
Write8(shift);
}

void XEmitter::PSLLW(X64Reg reg, OpArg arg) { WriteSSEOp(0x66, 0xF1, reg, arg); }
void XEmitter::PSLLD(X64Reg reg, OpArg arg) { WriteSSEOp(0x66, 0xF2, reg, arg); }
void XEmitter::PSLLQ(X64Reg reg, OpArg arg) { WriteSSEOp(0x66, 0xF3, reg, arg); }

void XEmitter::PSRAW(X64Reg reg, int shift)
{
WriteSSEOp(0x66, 0x71, (X64Reg)4, R(reg));
Expand All @@ -1746,6 +1749,9 @@ void XEmitter::PSRAD(X64Reg reg, int shift)
Write8(shift);
}

void XEmitter::PSRAW(X64Reg reg, OpArg arg) { WriteSSEOp(0x66, 0xE1, reg, arg); }
void XEmitter::PSRAD(X64Reg reg, OpArg arg) { WriteSSEOp(0x66, 0xE2, reg, arg); }

void XEmitter::PMULLW(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0xD5, dest, arg);}
void XEmitter::PMULHW(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0xE5, dest, arg);}
void XEmitter::PMULHUW(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0xE4, dest, arg);}
Expand Down Expand Up @@ -2240,20 +2246,20 @@ void XEmitter::VGATHERQPD(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {
_assert_msg_(regOp1 != regOp2 && !arg.IsIndexedReg(regOp1) && !arg.IsIndexedReg(regOp2), "VGATHER cannot have overlapped registers");
WriteAVX2Op(bits, 0x66, 0x3893, regOp1, regOp2, arg);
}
void XEmitter::VGATHERDD(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {
_assert_msg_(regOp1 != regOp2 && !arg.IsIndexedReg(regOp1) && !arg.IsIndexedReg(regOp2), "VGATHER cannot have overlapped registers");
void XEmitter::VPGATHERDD(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {
_assert_msg_(regOp1 != regOp2 && !arg.IsIndexedReg(regOp1) && !arg.IsIndexedReg(regOp2), "VPGATHER cannot have overlapped registers");
WriteAVX2Op(bits, 0x66, 0x3890, regOp1, regOp2, arg);
}
void XEmitter::VGATHERQD(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {
_assert_msg_(regOp1 != regOp2 && !arg.IsIndexedReg(regOp1) && !arg.IsIndexedReg(regOp2), "VGATHER cannot have overlapped registers");
void XEmitter::VPGATHERQD(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {
_assert_msg_(regOp1 != regOp2 && !arg.IsIndexedReg(regOp1) && !arg.IsIndexedReg(regOp2), "VPGATHER cannot have overlapped registers");
WriteAVX2Op(bits, 0x66, 0x3891, regOp1, regOp2, arg);
}
void XEmitter::VGATHERDQ(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {
_assert_msg_(regOp1 != regOp2 && !arg.IsIndexedReg(regOp1) && !arg.IsIndexedReg(regOp2), "VGATHER cannot have overlapped registers");
void XEmitter::VPGATHERDQ(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {
_assert_msg_(regOp1 != regOp2 && !arg.IsIndexedReg(regOp1) && !arg.IsIndexedReg(regOp2), "VPGATHER cannot have overlapped registers");
WriteAVX2Op(bits, 0x66, 0x3890, regOp1, regOp2, arg, 0, 1);
}
void XEmitter::VGATHERQQ(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {
_assert_msg_(regOp1 != regOp2 && !arg.IsIndexedReg(regOp1) && !arg.IsIndexedReg(regOp2), "VGATHER cannot have overlapped registers");
void XEmitter::VPGATHERQQ(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {
_assert_msg_(regOp1 != regOp2 && !arg.IsIndexedReg(regOp1) && !arg.IsIndexedReg(regOp2), "VPGATHER cannot have overlapped registers");
WriteAVX2Op(bits, 0x66, 0x3891, regOp1, regOp2, arg, 0, 1);
}

Expand Down
25 changes: 20 additions & 5 deletions Common/x64Emitter.h
Original file line number Diff line number Diff line change
Expand Up @@ -847,16 +847,31 @@ class XEmitter
void PSRLW(X64Reg reg, int shift);
void PSRLD(X64Reg reg, int shift);
void PSRLQ(X64Reg reg, int shift);
void PSRLQ(X64Reg reg, OpArg arg);
void PSRLDQ(X64Reg reg, int shift);
// Note: all values shifted by lowest 64-bit in XMM arg.
void PSRLW(X64Reg reg, OpArg arg);
// Note: all values shifted by lowest 64-bit in XMM arg.
void PSRLD(X64Reg reg, OpArg arg);
// Note: both values shifted by lowest 64-bit in XMM arg.
void PSRLQ(X64Reg reg, OpArg arg);

void PSLLW(X64Reg reg, int shift);
void PSLLD(X64Reg reg, int shift);
void PSLLQ(X64Reg reg, int shift);
void PSLLDQ(X64Reg reg, int shift);
// Note: all values shifted by lowest 64-bit in XMM arg.
void PSLLW(X64Reg reg, OpArg arg);
// Note: all values shifted by lowest 64-bit in XMM arg.
void PSLLD(X64Reg reg, OpArg arg);
// Note: both values shifted by lowest 64-bit in XMM arg.
void PSLLQ(X64Reg reg, OpArg arg);

void PSRAW(X64Reg reg, int shift);
void PSRAD(X64Reg reg, int shift);
// Note: all values shifted by lowest 64-bit in XMM arg.
void PSRAW(X64Reg reg, OpArg arg);
// Note: all values shifted by lowest 64-bit in XMM arg.
void PSRAD(X64Reg reg, OpArg arg);

void PMULLW(X64Reg dest, const OpArg &arg);
void PMULHW(X64Reg dest, const OpArg &arg);
Expand Down Expand Up @@ -1239,10 +1254,10 @@ class XEmitter
void VGATHERDPD(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
void VGATHERQPS(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
void VGATHERQPD(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
void VGATHERDD(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
void VGATHERQD(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
void VGATHERDQ(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
void VGATHERQQ(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
void VPGATHERDD(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
void VPGATHERQD(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
void VPGATHERDQ(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
void VPGATHERQQ(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);

void VPSLLVD(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VPSLLVQ(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
Expand Down
31 changes: 20 additions & 11 deletions GPU/Software/FuncId.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@

#include "Common/Data/Convert/ColorConv.h"
#include "Common/StringUtils.h"
#include "GPU/Software/FuncId.h"
#include "Core/MemMap.h"
#include "GPU/Common/TextureDecoder.h"
#include "GPU/GPUState.h"
#include "GPU/Software/FuncId.h"

static_assert(sizeof(SamplerID) == sizeof(SamplerID::fullKey), "Bad sampler ID size");
#ifdef SOFTPIXEL_USE_CACHE
Expand Down Expand Up @@ -361,18 +363,24 @@ void ComputeSamplerID(SamplerID *id_out) {
SamplerID id{};

id.useStandardBufw = true;
id.hasStandardMips = true;
id.overReadSafe = true;
int maxLevel = gstate.isMipmapEnabled() ? gstate.getTextureMaxLevel() : 0;
int lastWidth = -1;
for (int i = 0; i <= maxLevel; ++i) {
if (gstate.getTextureAddress(i) == 0)
uint32_t addr = gstate.getTextureAddress(i);
if (!Memory::IsValidAddress(addr))
id.hasInvalidPtr = true;

int bufw = GetTextureBufw(i, addr, gstate.getTextureFormat());
int bitspp = textureBitsPerPixel[gstate.getTextureFormat()];
// We use a 16 byte minimum for all small bufws, so allow those as standard.
int w = gstate.getTextureWidth(i);
if (w != (gstate.texbufwidth[i] & 0x00001FFF))
if (w != bufw && w * bitspp > 128)
id.useStandardBufw = false;
if (lastWidth != -1 && lastWidth != w * 2)
id.hasStandardMips = false;
lastWidth = w;

int h = gstate.getTextureHeight(i);
int bytes = h * (bufw * bitspp) / 8;
if (bitspp < 32 && !Memory::IsValidAddress(addr + bytes + (32 - bitspp) / 8))
id.overReadSafe = false;
}
id.hasAnyMips = maxLevel != 0;

Expand Down Expand Up @@ -458,9 +466,10 @@ std::string DescribeSamplerID(const SamplerID &id) {
if (!id.useStandardBufw) {
name += ":BUFW";
}
if (!id.hasStandardMips) {
name += ":XMIP";
} else if (id.hasAnyMips) {
if (!id.overReadSafe) {
name += ":XRD";
}
if (id.hasAnyMips) {
name += ":MIP";
}
if (id.linear) {
Expand Down
2 changes: 1 addition & 1 deletion GPU/Software/FuncId.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,8 @@ struct SamplerID {
uint8_t texFunc : 3;
bool useTextureAlpha : 1;
bool useColorDoubling : 1;
bool hasStandardMips : 1;
bool hasAnyMips : 1;
bool overReadSafe : 1;
bool fetch : 1;
};
};
Expand Down
1 change: 1 addition & 0 deletions GPU/Software/RasterizerRegCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ struct RegCache {
VEC_RESULT1 = 0x0002,
VEC_U1 = 0x0003,
VEC_V1 = 0x0004,
VEC_INDEX = 0x0005,

GEN_SRC_ALPHA = 0x0100,
GEN_GSTATE = 0x0101,
Expand Down
4 changes: 4 additions & 0 deletions GPU/Software/Sampler.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ class SamplerJitCache : public Rasterizer::CodeBlock {
bool Jit_PrepareDataOffsets(const SamplerID &id, Rasterizer::RegCache::Reg uReg, Rasterizer::RegCache::Reg vReg, bool level1);
bool Jit_PrepareDataDirectOffsets(const SamplerID &id, Rasterizer::RegCache::Reg uReg, Rasterizer::RegCache::Reg vReg, bool level1, int bitsPerTexel);
bool Jit_PrepareDataSwizzledOffsets(const SamplerID &id, Rasterizer::RegCache::Reg uReg, Rasterizer::RegCache::Reg vReg, bool level1, int bitsPerTexel);
bool Jit_FetchQuad(const SamplerID &id, bool level1);
bool Jit_GetDataQuad(const SamplerID &id, bool level1, int bitsPerTexel);
bool Jit_TransformClutIndexQuad(const SamplerID &id, int bitsPerIndex);
bool Jit_ReadClutQuad(const SamplerID &id, bool level1);
bool Jit_BlendQuad(const SamplerID &id, bool level1);
bool Jit_DecodeQuad(const SamplerID &id, bool level1);
bool Jit_Decode5650Quad(const SamplerID &id, Rasterizer::RegCache::Reg quadReg);
Expand Down
Loading