Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Delete a lot of specialized alpha checking code. #15482

Merged
merged 5 commits into from
Apr 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions Core/TextureReplacer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -839,6 +839,7 @@ void ReplacedTexture::PrepareData(int level) {

int w, h, f;
uint8_t *image;

if (LoadZIMPtr(&zim[0], zimSize, &w, &h, &f, &image)) {
if (w > info.w || h > info.h) {
ERROR_LOG(G3D, "Texture replacement changed since header read: %s", info.file.c_str());
Expand All @@ -857,7 +858,7 @@ void ReplacedTexture::PrepareData(int level) {
free(image);
}

CheckAlphaResult res = CheckAlphaRGBA8888Basic((u32 *)&out[0], info.w, w, h);
CheckAlphaResult res = CheckAlpha32Rect((u32 *)&out[0], info.w, w, h, 0xFF000000);
if (res == CHECKALPHA_ANY || level == 0) {
alphaStatus_ = ReplacedTextureAlpha(res);
}
Expand Down Expand Up @@ -897,7 +898,7 @@ void ReplacedTexture::PrepareData(int level) {

if (!checkedAlpha) {
// This will only check the hashed bits.
CheckAlphaResult res = CheckAlphaRGBA8888Basic((u32 *)&out[0], info.w, png.width, png.height);
CheckAlphaResult res = CheckAlpha32Rect((u32 *)&out[0], info.w, png.width, png.height, 0xFF000000);
if (res == CHECKALPHA_ANY || level == 0) {
alphaStatus_ = ReplacedTextureAlpha(res);
}
Expand Down
112 changes: 0 additions & 112 deletions GPU/Common/TextureCacheCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1430,118 +1430,6 @@ inline u32 TfmtRawToFullAlpha(GETextureFormat fmt) {
return 0;
}

#ifdef _M_SSE
inline u32 SSEReduce32And(__m128i value) {
// TODO: Should use a shuffle instead of slri, probably.
value = _mm_and_si128(value, _mm_srli_si128(value, 64));
value = _mm_and_si128(value, _mm_srli_si128(value, 32));
return _mm_cvtsi128_si32(value);
}
inline u32 SSEReduce16And(__m128i value) {
// TODO: Should use a shuffle instead of slri, probably.
value = _mm_and_si128(value, _mm_srli_si128(value, 64));
value = _mm_and_si128(value, _mm_srli_si128(value, 32));
value = _mm_and_si128(value, _mm_srli_si128(value, 16));
return _mm_cvtsi128_si32(value);
}
#endif

#if PPSSPP_ARCH(ARM_NEON)
inline u32 NEONReduce32And(uint32x4_t value) {
// TODO: Maybe a shuffle and a vector and, or something?
return vgetq_lane_u32(value, 0) & vgetq_lane_u32(value, 1) & vgetq_lane_u32(value, 2) & vgetq_lane_u32(value, 3);
}
#endif

// TODO: SSE/SIMD
// At least on x86, compiler actually SIMDs these pretty well.
void CopyAndSumMask16(u16 *dst, const u16 *src, int width, u32 *outMask) {
u16 mask = 0xFFFF;
for (int i = 0; i < width; i++) {
u16 color = src[i];
mask &= color;
dst[i] = color;
}
*outMask &= (u32)mask;
}

// Used in video playback so nice to have being fast.
void CopyAndSumMask32(u32 *dst, const u32 *src, int width, u32 *outMask) {
u32 mask = 0xFFFFFFFF;
#ifdef _M_SSE
if (width >= 4) {
__m128i wideMask = _mm_set1_epi32(0xFFFFFFFF);
while (width >= 4) {
__m128i color = _mm_loadu_si128((__m128i *)src);
wideMask = _mm_and_si128(wideMask, color);
_mm_storeu_si128((__m128i *)dst, color);
src += 4;
dst += 4;
width -= 4;
}
mask = SSEReduce32And(wideMask);
}
#elif PPSSPP_ARCH(ARM_NEON)
if (width >= 4) {
uint32x4_t wideMask = vdupq_n_u32(0xFFFFFFFF);
while (width >= 4) {
uint32x4_t colors = vld1q_u32(src);
wideMask = vandq_u32(wideMask, colors);
vst1q_u32(dst, colors);
src += 4;
dst += 4;
width -= 4;
}
mask = NEONReduce32And(wideMask);
}
#endif

for (int i = 0; i < width; i++) {
u32 color = src[i];
mask &= color;
dst[i] = color;
}
*outMask &= (u32)mask;
}

void CheckMask16(const u16 *src, int width, u32 *outMask) {
u16 mask = 0xFFFF;
for (int i = 0; i < width; i++) {
mask &= src[i];
}
*outMask &= (u32)mask;
}

void CheckMask32(const u32 *src, int width, u32 *outMask) {
u32 mask = 0xFFFFFFFF;
#ifdef _M_SSE
if (width >= 4) {
__m128i wideMask = _mm_set1_epi32(0xFFFFFFFF);
while (width >= 4) {
wideMask = _mm_and_si128(wideMask, _mm_loadu_si128((__m128i *)src));
src += 4;
width -= 4;
}
mask = SSEReduce32And(wideMask);
}
#elif PPSSPP_ARCH(ARM_NEON)
if (width >= 4) {
uint32x4_t wideMask = vdupq_n_u32(0xFFFFFFFF);
while (width >= 4) {
wideMask = vandq_u32(wideMask, vld1q_u32(src));
src += 4;
width -= 4;
}
mask = NEONReduce32And(wideMask);
}
#endif

for (int i = 0; i < width; i++) {
mask &= src[i];
}
*outMask &= (u32)mask;
}

CheckAlphaResult TextureCacheCommon::DecodeTextureLevel(u8 *out, int outPitch, GETextureFormat format, GEPaletteFormat clutformat, uint32_t texaddr, int level, int bufw, bool reverseColors, bool useBGRA, bool expandTo32bit) {
u32 alphaSum = 0xFFFFFFFF;
u32 fullAlphaMask = 0x0;
Expand Down
Loading