Skip to content

Commit

Permalink
fix VertexDecoder/SoftGpu on big-endian.
Browse files Browse the repository at this point in the history
  • Loading branch information
aliaspider committed May 6, 2018
1 parent 0188d5c commit e98e9f7
Show file tree
Hide file tree
Showing 7 changed files with 105 additions and 98 deletions.
57 changes: 25 additions & 32 deletions Common/ColorConv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -322,13 +322,12 @@ void ConvertRGBA565ToRGBA8888(u32 *dst32, const u16 *src, u32 numPixels) {
u32 i = 0;
#endif

u8 *dst = (u8 *)dst32;
for (u32 x = i; x < numPixels; x++) {
u16 col = src[x];
dst[x * 4] = Convert5To8((col) & 0x1f);
dst[x * 4 + 1] = Convert6To8((col >> 5) & 0x3f);
dst[x * 4 + 2] = Convert5To8((col >> 11) & 0x1f);
dst[x * 4 + 3] = 255;
dst32[x] = Convert5To8((col) & 0x1f);
dst32[x] |= Convert6To8((col >> 5) & 0x3f) << 8;
dst32[x] |= Convert5To8((col >> 11) & 0x1f) << 16;
dst32[x] |= 255 << 24;
}
}

Expand Down Expand Up @@ -376,13 +375,12 @@ void ConvertRGBA5551ToRGBA8888(u32 *dst32, const u16 *src, u32 numPixels) {
u32 i = 0;
#endif

u8 *dst = (u8 *)dst32;
for (u32 x = i; x < numPixels; x++) {
u16 col = src[x];
dst[x * 4] = Convert5To8((col) & 0x1f);
dst[x * 4 + 1] = Convert5To8((col >> 5) & 0x1f);
dst[x * 4 + 2] = Convert5To8((col >> 10) & 0x1f);
dst[x * 4 + 3] = (col >> 15) ? 255 : 0;
dst32[x] = Convert5To8((col) & 0x1f);
dst32[x] |= Convert5To8((col >> 5) & 0x1f) << 8;
dst32[x] |= Convert5To8((col >> 10) & 0x1f) << 16;
dst32[x] |= (col >> 15) ? 255 << 24 : 0;
}
}

Expand Down Expand Up @@ -425,51 +423,46 @@ void ConvertRGBA4444ToRGBA8888(u32 *dst32, const u16 *src, u32 numPixels) {
u32 i = 0;
#endif

u8 *dst = (u8 *)dst32;
for (u32 x = i; x < numPixels; x++) {
u16 col = src[x];
dst[x * 4] = Convert4To8(col & 0xf);
dst[x * 4 + 1] = Convert4To8((col >> 4) & 0xf);
dst[x * 4 + 2] = Convert4To8((col >> 8) & 0xf);
dst[x * 4 + 3] = Convert4To8(col >> 12);
dst32[x] = Convert4To8(col & 0xf);
dst32[x] |= Convert4To8((col >> 4) & 0xf) << 8;
dst32[x] |= Convert4To8((col >> 8) & 0xf) << 16;
dst32[x] |= Convert4To8(col >> 12) << 24;
}
}

void ConvertABGR565ToRGBA8888(u32 *dst32, const u16 *src, u32 numPixels) {
u8 *dst = (u8 *)dst32;
for (u32 x = 0; x < numPixels; x++) {
u16 col = src[x];
dst[x * 4] = Convert5To8((col >> 11) & 0x1f);
dst[x * 4 + 1] = Convert6To8((col >> 5) & 0x3f);
dst[x * 4 + 2] = Convert5To8((col) & 0x1f);
dst[x * 4 + 3] = 255;
dst32[x] = Convert5To8((col >> 11) & 0x1f);
dst32[x] |= Convert6To8((col >> 5) & 0x3f) << 8;
dst32[x] |= Convert5To8((col) & 0x1f) << 16;
dst32[x] |= 255 << 24;
}
}

void ConvertABGR1555ToRGBA8888(u32 *dst32, const u16 *src, u32 numPixels) {
u8 *dst = (u8 *)dst32;
for (u32 x = 0; x < numPixels; x++) {
u16 col = src[x];
dst[x * 4] = Convert5To8((col >> 11) & 0x1f);
dst[x * 4 + 1] = Convert5To8((col >> 6) & 0x1f);
dst[x * 4 + 2] = Convert5To8((col >> 1) & 0x1f);
dst[x * 4 + 3] = (col & 1) ? 255 : 0;
dst32[x] = Convert5To8((col >> 11) & 0x1f);
dst32[x] |= Convert5To8((col >> 6) & 0x1f) << 8;
dst32[x] |= Convert5To8((col >> 1) & 0x1f) << 16;
dst32[x] |= (col & 1) ? 255 << 24 : 0;
}
}

void ConvertABGR4444ToRGBA8888(u32 *dst32, const u16 *src, u32 numPixels) {
u8 *dst = (u8 *)dst32;
for (u32 x = 0; x < numPixels; x++) {
u16 col = src[x];
dst[x * 4] = Convert4To8(col >> 12);
dst[x * 4 + 1] = Convert4To8((col >> 8) & 0xf);
dst[x * 4 + 2] = Convert4To8((col >> 4) & 0xf);
dst[x * 4 + 3] = Convert4To8(col & 0xf);
dst32[x] = Convert4To8(col >> 12);
dst32[x] |= Convert4To8((col >> 8) & 0xf) << 8;
dst32[x] |= Convert4To8((col >> 4) & 0xf) << 16;
dst32[x] |= Convert4To8(col & 0xf) << 24;
}
}

void ConvertRGBA4444ToBGRA8888(u32 *dst32, const u16 *src, u32 numPixels) {
u8 *dst = (u8 *)dst32;
void ConvertRGBA4444ToBGRA8888(u32 *dst, const u16 *src, u32 numPixels) {
for (u32 x = 0; x < numPixels; x++) {
u16 c = src[x];
u32 r = c & 0x000f;
Expand Down
54 changes: 30 additions & 24 deletions GPU/Common/VertexDecoderCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ void GetIndexBounds(const void *inds, int count, u32 vertType, u16 *indexLowerBo
lowerBound = value;
}
} else if (idx == GE_VTYPE_IDX_16BIT) {
const u16 *ind16 = (const u16 *)inds;
const u16_le *ind16 = (const u16_le *)inds;
for (int i = 0; i < count; i++) {
u16 value = ind16[i];
if (value > upperBound)
Expand All @@ -127,7 +127,7 @@ void GetIndexBounds(const void *inds, int count, u32 vertType, u16 *indexLowerBo
}
} else if (idx == GE_VTYPE_IDX_32BIT) {
WARN_LOG_REPORT_ONCE(indexBounds32, G3D, "GetIndexBounds: Decoding 32-bit indexes");
const u32 *ind32 = (const u32 *)inds;
const u32_le *ind32 = (const u32_le *)inds;
for (int i = 0; i < count; i++) {
u16 value = (u16)ind32[i];
// These aren't documented and should be rare. Let's bounds check each one.
Expand Down Expand Up @@ -496,41 +496,42 @@ void VertexDecoder::Step_ColorInvalid() const

void VertexDecoder::Step_Color565() const
{
u8 *c = decoded_ + decFmt.c0off;
u32 *c = (u32*)(decoded_ + decFmt.c0off);
u16 cdata = *(u16_le *)(ptr_ + coloff);
c[0] = Convert5To8(cdata & 0x1f);
c[1] = Convert6To8((cdata >> 5) & 0x3f);
c[2] = Convert5To8((cdata >> 11) & 0x1f);
c[3] = 255;
*c = Convert5To8(cdata & 0x1f);
*c |= Convert6To8((cdata >> 5) & 0x3f) << 8;
*c |= Convert5To8((cdata >> 11) & 0x1f) << 16;
*c |= 255 << 24;
// Always full alpha.
}

void VertexDecoder::Step_Color5551() const
{
u8 *c = decoded_ + decFmt.c0off;
u32 *c = (u32*)(decoded_ + decFmt.c0off);
u16 cdata = *(u16_le *)(ptr_ + coloff);
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (cdata >> 15) != 0;
c[0] = Convert5To8(cdata & 0x1f);
c[1] = Convert5To8((cdata >> 5) & 0x1f);
c[2] = Convert5To8((cdata >> 10) & 0x1f);
c[3] = (cdata >> 15) ? 255 : 0;
*c = Convert5To8(cdata & 0x1f);
*c |= Convert5To8((cdata >> 5) & 0x1f) << 8;
*c |= Convert5To8((cdata >> 10) & 0x1f) << 16;
*c |= (cdata >> 15) ? 255 << 24 : 0;
}

void VertexDecoder::Step_Color4444() const
{
u8 *c = decoded_ + decFmt.c0off;
u32 *c = (u32*)(decoded_ + decFmt.c0off);
u16 cdata = *(u16_le *)(ptr_ + coloff);
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (cdata >> 12) == 0xF;
*c = 0;
for (int j = 0; j < 4; j++)
c[j] = Convert4To8((cdata >> (j * 4)) & 0xF);
*c |= Convert4To8((cdata >> (j * 4)) & 0xF) << (j * 8);
}

void VertexDecoder::Step_Color8888() const
{
u8 *c = decoded_ + decFmt.c0off;
const u8 *cdata = (const u8*)(ptr_ + coloff);
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && cdata[3] == 255;
memcpy(c, cdata, sizeof(u8) * 4);
u32 *c = (u32*)(decoded_ + decFmt.c0off);
u32 cdata = *(u32_le*)(ptr_ + coloff);
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (cdata >> 24) == 0xFF;
*c = cdata;
}

void VertexDecoder::Step_Color565Morph() const
Expand Down Expand Up @@ -750,9 +751,10 @@ void VertexDecoder::Step_PosS16() const

void VertexDecoder::Step_PosFloat() const
{
u8 *v = (u8 *)(decoded_ + decFmt.posoff);
const u8 *fv = (const u8*)(ptr_ + posoff);
memcpy(v, fv, 12);
float *pos = (float *)(decoded_ + decFmt.posoff);
const float_le *fv = (const float_le *)(ptr_ + posoff);
for (int j = 0; j < 3; j++)
pos[j] = fv[j];
}

void VertexDecoder::Step_PosS8Skin() const
Expand Down Expand Up @@ -800,9 +802,11 @@ void VertexDecoder::Step_PosS16Through() const

void VertexDecoder::Step_PosFloatThrough() const
{
u8 *v = (u8 *)(decoded_ + decFmt.posoff);
const u8 *fv = (const u8 *)(ptr_ + posoff);
memcpy(v, fv, 12);
float *v = (float *)(decoded_ + decFmt.posoff);
const float_le *fv = (const float_le*)(ptr_ + posoff);
v[0] = fv[0];
v[1] = fv[1];
v[2] = fv[2];
}

void VertexDecoder::Step_PosS8Morph() const
Expand Down Expand Up @@ -1355,6 +1359,8 @@ std::string VertexDecoder::GetString(DebugShaderStringType stringType) {
lines = DisassembleArm2((const u8 *)jitted_, jittedSize_);
#elif defined(MIPS)
// No MIPS disassembler defined
#elif defined(__PPC__)
// No PPC disassembler defined
#else
lines = DisassembleX86((const u8 *)jitted_, jittedSize_);
#endif
Expand Down
6 changes: 3 additions & 3 deletions GPU/GPUCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -973,7 +973,7 @@ void GPUCommon::FastRunLoop(DisplayList &list) {
int dc = downcount;
for (; dc > 0; --dc) {
// We know that display list PCs have the upper nibble == 0 - no need to mask the pointer
const u32 op = *(const u32 *)(Memory::base + list.pc);
const u32 op = Memory::ReadUnchecked_U32(list.pc);
const u32 cmd = op >> 24;
const CommandInfo &info = cmdInfo[cmd];
const u32 diff = op ^ gstate.cmdmem[cmd];
Expand Down Expand Up @@ -2151,8 +2151,8 @@ void GPUCommon::FlushImm() {
// through vertices.
// Since the only known use is Thrillville and it only uses it to clear, we just use color and pos.
struct ImmVertex {
uint32_t color;
float xyz[3];
u32_le color;
float_le xyz[3];
};
ImmVertex temp[MAX_IMMBUFFER_SIZE];
for (int i = 0; i < immCount_; i++) {
Expand Down
8 changes: 4 additions & 4 deletions GPU/Software/Rasterizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1503,7 +1503,7 @@ void ClearRectangle(const VertexData &v0, const VertexData &v1)
DrawingCoords p = TransformUnit::ScreenToDrawing(pprime);

if ((z & 0xFF) == (z >> 8)) {
u16 *row = &depthbuf.as16[p.x + p.y * stride];
void *row = &depthbuf.as16[p.x + p.y * stride];
memset(row, z, w * 2);
} else {
for (int x = 0; x < w; ++x) {
Expand Down Expand Up @@ -1560,8 +1560,8 @@ void ClearRectangle(const VertexData &v0, const VertexData &v1)
if (gstate.FrameBufFormat() == GE_FORMAT_8888) {
for (pprime.y = minY; pprime.y < maxY; pprime.y += 16) {
DrawingCoords p = TransformUnit::ScreenToDrawing(pprime);
if ((new_color & 0xFF) == (new_color >> 8) && (new_color & 0xFFFF) == (new_color >> 16)) {
u32 *row = &fb.as32[p.x + p.y * stride];
if ((new_color & 0xFF) == (u8)(new_color >> 8) && (new_color & 0xFFFF) == (new_color >> 16)) {
void *row = &fb.as32[p.x + p.y * stride];
memset(row, new_color, w * 4);
} else {
for (int x = 0; x < w; ++x) {
Expand All @@ -1573,7 +1573,7 @@ void ClearRectangle(const VertexData &v0, const VertexData &v1)
for (pprime.y = minY; pprime.y < maxY; pprime.y += 16) {
DrawingCoords p = TransformUnit::ScreenToDrawing(pprime);
if ((new_color16 & 0xFF) == (new_color16 >> 8)) {
u16 *row = &fb.as16[p.x + p.y * stride];
void *row = &fb.as16[p.x + p.y * stride];
memset(row, new_color16, w * 2);
} else {
for (int x = 0; x < w; ++x) {
Expand Down
20 changes: 10 additions & 10 deletions GPU/Software/Sampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -277,16 +277,16 @@ static inline u32 LookupColor(unsigned int index, unsigned int level)

switch (gstate.getClutPaletteFormat()) {
case GE_CMODE_16BIT_BGR5650:
return RGB565ToRGBA8888(reinterpret_cast<u16*>(clut)[index + clutSharingOffset]);
return RGB565ToRGBA8888(reinterpret_cast<u16_le*>(clut)[index + clutSharingOffset]);

case GE_CMODE_16BIT_ABGR5551:
return RGBA5551ToRGBA8888(reinterpret_cast<u16*>(clut)[index + clutSharingOffset]);
return RGBA5551ToRGBA8888(reinterpret_cast<u16_le*>(clut)[index + clutSharingOffset]);

case GE_CMODE_16BIT_ABGR4444:
return RGBA4444ToRGBA8888(reinterpret_cast<u16*>(clut)[index + clutSharingOffset]);
return RGBA4444ToRGBA8888(reinterpret_cast<u16_le*>(clut)[index + clutSharingOffset]);

case GE_CMODE_32BIT_ABGR8888:
return clut[index + clutSharingOffset];
return reinterpret_cast<u32_le*>(clut)[index + clutSharingOffset];

default:
ERROR_LOG_REPORT(G3D, "Software: Unsupported palette format: %x", gstate.getClutPaletteFormat());
Expand Down Expand Up @@ -319,43 +319,43 @@ inline static Nearest4 SampleNearest(int u[N], int v[N], const u8 *srcptr, int t
case GE_TFMT_4444:
for (int i = 0; i < N; ++i) {
const u8 *src = srcptr + GetPixelDataOffset<16>(texbufw, u[i], v[i]);
res.v[i] = RGBA4444ToRGBA8888(*(const u16 *)src);
res.v[i] = RGBA4444ToRGBA8888(*(const u16_le *)src);
}
return res;

case GE_TFMT_5551:
for (int i = 0; i < N; ++i) {
const u8 *src = srcptr + GetPixelDataOffset<16>(texbufw, u[i], v[i]);
res.v[i] = RGBA5551ToRGBA8888(*(const u16 *)src);
res.v[i] = RGBA5551ToRGBA8888(*(const u16_le *)src);
}
return res;

case GE_TFMT_5650:
for (int i = 0; i < N; ++i) {
const u8 *src = srcptr + GetPixelDataOffset<16>(texbufw, u[i], v[i]);
res.v[i] = RGB565ToRGBA8888(*(const u16 *)src);
res.v[i] = RGB565ToRGBA8888(*(const u16_le *)src);
}
return res;

case GE_TFMT_8888:
for (int i = 0; i < N; ++i) {
const u8 *src = srcptr + GetPixelDataOffset<32>(texbufw, u[i], v[i]);
res.v[i] = *(const u32 *)src;
res.v[i] = *(const u32_le *)src;
}
return res;

case GE_TFMT_CLUT32:
for (int i = 0; i < N; ++i) {
const u8 *src = srcptr + GetPixelDataOffset<32>(texbufw, u[i], v[i]);
u32 val = src[0] + (src[1] << 8) + (src[2] << 16) + (src[3] << 24);
u32 val = *(u32_le *)src;
res.v[i] = LookupColor(gstate.transformClutIndex(val), 0);
}
return res;

case GE_TFMT_CLUT16:
for (int i = 0; i < N; ++i) {
const u8 *src = srcptr + GetPixelDataOffset<16>(texbufw, u[i], v[i]);
u16 val = src[0] + (src[1] << 8);
u16 val = *(u16_le *)src;
res.v[i] = LookupColor(gstate.transformClutIndex(val), 0);
}
return res;
Expand Down
Loading

0 comments on commit e98e9f7

Please sign in to comment.