Skip to content

Commit

Permalink
Merge pull request #15482 from hrydgard/simplify-checkalpha
Browse files Browse the repository at this point in the history
Delete a lot of specialized alpha checking code.
  • Loading branch information
hrydgard authored Apr 15, 2022
2 parents 0624235 + 900ff64 commit 9b25093
Show file tree
Hide file tree
Showing 12 changed files with 203 additions and 559 deletions.
5 changes: 3 additions & 2 deletions Core/TextureReplacer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -839,6 +839,7 @@ void ReplacedTexture::PrepareData(int level) {

int w, h, f;
uint8_t *image;

if (LoadZIMPtr(&zim[0], zimSize, &w, &h, &f, &image)) {
if (w > info.w || h > info.h) {
ERROR_LOG(G3D, "Texture replacement changed since header read: %s", info.file.c_str());
Expand All @@ -857,7 +858,7 @@ void ReplacedTexture::PrepareData(int level) {
free(image);
}

CheckAlphaResult res = CheckAlphaRGBA8888Basic((u32 *)&out[0], info.w, w, h);
CheckAlphaResult res = CheckAlpha32Rect((u32 *)&out[0], info.w, w, h, 0xFF000000);
if (res == CHECKALPHA_ANY || level == 0) {
alphaStatus_ = ReplacedTextureAlpha(res);
}
Expand Down Expand Up @@ -897,7 +898,7 @@ void ReplacedTexture::PrepareData(int level) {

if (!checkedAlpha) {
// This will only check the hashed bits.
CheckAlphaResult res = CheckAlphaRGBA8888Basic((u32 *)&out[0], info.w, png.width, png.height);
CheckAlphaResult res = CheckAlpha32Rect((u32 *)&out[0], info.w, png.width, png.height, 0xFF000000);
if (res == CHECKALPHA_ANY || level == 0) {
alphaStatus_ = ReplacedTextureAlpha(res);
}
Expand Down
112 changes: 0 additions & 112 deletions GPU/Common/TextureCacheCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1430,118 +1430,6 @@ inline u32 TfmtRawToFullAlpha(GETextureFormat fmt) {
return 0;
}

#ifdef _M_SSE
inline u32 SSEReduce32And(__m128i value) {
// TODO: Should use a shuffle instead of slri, probably.
value = _mm_and_si128(value, _mm_srli_si128(value, 64));
value = _mm_and_si128(value, _mm_srli_si128(value, 32));
return _mm_cvtsi128_si32(value);
}
inline u32 SSEReduce16And(__m128i value) {
// TODO: Should use a shuffle instead of slri, probably.
value = _mm_and_si128(value, _mm_srli_si128(value, 64));
value = _mm_and_si128(value, _mm_srli_si128(value, 32));
value = _mm_and_si128(value, _mm_srli_si128(value, 16));
return _mm_cvtsi128_si32(value);
}
#endif

#if PPSSPP_ARCH(ARM_NEON)
inline u32 NEONReduce32And(uint32x4_t value) {
// TODO: Maybe a shuffle and a vector and, or something?
return vgetq_lane_u32(value, 0) & vgetq_lane_u32(value, 1) & vgetq_lane_u32(value, 2) & vgetq_lane_u32(value, 3);
}
#endif

// TODO: SSE/SIMD
// At least on x86, compiler actually SIMDs these pretty well.
void CopyAndSumMask16(u16 *dst, const u16 *src, int width, u32 *outMask) {
u16 mask = 0xFFFF;
for (int i = 0; i < width; i++) {
u16 color = src[i];
mask &= color;
dst[i] = color;
}
*outMask &= (u32)mask;
}

// Used in video playback so nice to have being fast.
void CopyAndSumMask32(u32 *dst, const u32 *src, int width, u32 *outMask) {
u32 mask = 0xFFFFFFFF;
#ifdef _M_SSE
if (width >= 4) {
__m128i wideMask = _mm_set1_epi32(0xFFFFFFFF);
while (width >= 4) {
__m128i color = _mm_loadu_si128((__m128i *)src);
wideMask = _mm_and_si128(wideMask, color);
_mm_storeu_si128((__m128i *)dst, color);
src += 4;
dst += 4;
width -= 4;
}
mask = SSEReduce32And(wideMask);
}
#elif PPSSPP_ARCH(ARM_NEON)
if (width >= 4) {
uint32x4_t wideMask = vdupq_n_u32(0xFFFFFFFF);
while (width >= 4) {
uint32x4_t colors = vld1q_u32(src);
wideMask = vandq_u32(wideMask, colors);
vst1q_u32(dst, colors);
src += 4;
dst += 4;
width -= 4;
}
mask = NEONReduce32And(wideMask);
}
#endif

for (int i = 0; i < width; i++) {
u32 color = src[i];
mask &= color;
dst[i] = color;
}
*outMask &= (u32)mask;
}

void CheckMask16(const u16 *src, int width, u32 *outMask) {
u16 mask = 0xFFFF;
for (int i = 0; i < width; i++) {
mask &= src[i];
}
*outMask &= (u32)mask;
}

void CheckMask32(const u32 *src, int width, u32 *outMask) {
u32 mask = 0xFFFFFFFF;
#ifdef _M_SSE
if (width >= 4) {
__m128i wideMask = _mm_set1_epi32(0xFFFFFFFF);
while (width >= 4) {
wideMask = _mm_and_si128(wideMask, _mm_loadu_si128((__m128i *)src));
src += 4;
width -= 4;
}
mask = SSEReduce32And(wideMask);
}
#elif PPSSPP_ARCH(ARM_NEON)
if (width >= 4) {
uint32x4_t wideMask = vdupq_n_u32(0xFFFFFFFF);
while (width >= 4) {
wideMask = vandq_u32(wideMask, vld1q_u32(src));
src += 4;
width -= 4;
}
mask = NEONReduce32And(wideMask);
}
#endif

for (int i = 0; i < width; i++) {
mask &= src[i];
}
*outMask &= (u32)mask;
}

CheckAlphaResult TextureCacheCommon::DecodeTextureLevel(u8 *out, int outPitch, GETextureFormat format, GEPaletteFormat clutformat, uint32_t texaddr, int level, int bufw, bool reverseColors, bool useBGRA, bool expandTo32bit) {
u32 alphaSum = 0xFFFFFFFF;
u32 fullAlphaMask = 0x0;
Expand Down
Loading

0 comments on commit 9b25093

Please sign in to comment.