Skip to content

Commit

Permalink
sln: remove SSE4 from build options, make SSE4 default
Browse files Browse the repository at this point in the history
  • Loading branch information
GovanifY committed Mar 26, 2021
1 parent 1c88a2d commit 1a46a43
Show file tree
Hide file tree
Showing 10 changed files with 108 additions and 189 deletions.
90 changes: 0 additions & 90 deletions PCSX2_suite.sln

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions buildbot.xml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
</PropertyGroup>
<ItemGroup>
<ConfigCPU Include="Debug"/>
<!--ConfigCPU Include="Debug AVX2;Debug SSE4"/-->
<!--ConfigCPU Include="Debug AVX2;Debug"/-->
</ItemGroup>
</Target>
<Target Name="DevelAll">
Expand All @@ -62,15 +62,15 @@
</PropertyGroup>
<ItemGroup>
<ConfigCPU Include="Devel"/>
<!--ConfigCPU Include="Devel AVX2;Devel SSE4"/-->
<!--ConfigCPU Include="Devel AVX2;Devel"/-->
</ItemGroup>
</Target>
<Target Name="ReleaseAll">
<PropertyGroup>
<BaseConfiguration>Release</BaseConfiguration>
</PropertyGroup>
<ItemGroup>
<ConfigCPU Include="Release AVX2;Release SSE4"/>
<ConfigCPU Include="Release AVX2;Release"/>
</ItemGroup>
</Target>
</Project>
31 changes: 31 additions & 0 deletions plugins/GSdx/GSBlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,35 @@ class GSBlock

template<int i> __forceinline static void ReadColumn8(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
{

//for(int j = 0; j < 64; j++) ((uint8*)src)[j] = (uint8)j;

#if 0 //_M_SSE >= 0x501

const GSVector8i* s = (const GSVector8i*)src;

GSVector8i v0 = s[i * 2 + 0];
GSVector8i v1 = s[i * 2 + 1];

GSVector8i::sw8(v0, v1);
GSVector8i::sw16(v0, v1);
GSVector8i::sw8(v0, v1);
GSVector8i::sw128(v0, v1);
GSVector8i::sw16(v0, v1);

v0 = v0.acbd();
v1 = v1.acbd();
v1 = v1.yxwz();

GSVector8i::storel(&dst[dstpitch * 0], v0);
GSVector8i::storeh(&dst[dstpitch * 1], v0);
GSVector8i::storel(&dst[dstpitch * 2], v1);
GSVector8i::storeh(&dst[dstpitch * 3], v1);

// TODO: not sure if this is worth it, not in this form, there should be a shorter path

#else

const GSVector4i* s = (const GSVector4i*)src;

GSVector4i v0, v1, v2, v3;
Expand Down Expand Up @@ -550,6 +579,8 @@ class GSBlock
GSVector4i::store<true>(&dst[dstpitch * 1], v3);
GSVector4i::store<true>(&dst[dstpitch * 2], v1);
GSVector4i::store<true>(&dst[dstpitch * 3], v2);

#endif
}

template<int i> __forceinline static void ReadColumn4(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
Expand Down
4 changes: 0 additions & 4 deletions plugins/GSdx/GSState.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2734,11 +2734,7 @@ __forceinline void GSState::VertexKick(uint32 skip)

GSVector4i xy = v1.xxxx().u16to32().sub32(m_ofxy);

#if _M_SSE >= 0x401
GSVector4i::storel(&m_vertex.xy[xy_tail & 3], xy.blend16<0xf0>(xy.sra32(4)).ps32());
#else
GSVector4i::storel(&m_vertex.xy[xy_tail & 3], xy.upl64(xy.sra32(4).zwzw()).ps32());
#endif

m_vertex.tail = ++tail;
m_vertex.xy_tail = ++xy_tail;
Expand Down
10 changes: 0 additions & 10 deletions plugins/GSdx/GSUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,6 @@ const char* GSUtil::GetLibName()
"AVX", sw_sse
#elif _M_SSE >= 0x401
"SSE4.1", sw_sse
#elif _M_SSE >= 0x301
"SSSE3", sw_sse
#elif _M_SSE >= 0x200
"SSE2", sw_sse
#endif
);

Expand Down Expand Up @@ -221,13 +217,7 @@ bool GSUtil::CheckSSE()
};

ISA checks[] = {
{Xbyak::util::Cpu::tSSE2, "SSE2"},
#if _M_SSE >= 0x301
{Xbyak::util::Cpu::tSSSE3, "SSSE3"},
#endif
#if _M_SSE >= 0x401
{Xbyak::util::Cpu::tSSE41, "SSE41"},
#endif
#if _M_SSE >= 0x500
{Xbyak::util::Cpu::tAVX, "AVX1"},
#endif
Expand Down
57 changes: 47 additions & 10 deletions plugins/GSdx/GSVector4.h
Original file line number Diff line number Diff line change
Expand Up @@ -316,22 +316,54 @@ class alignas(16) GSVector4

__forceinline GSVector4 madd(const GSVector4& a, const GSVector4& b) const
{
return *this * a + b;
#if 0//_M_SSE >= 0x501

return GSVector4(_mm_fmadd_ps(m, a, b));

#else

return *this * a + b;

#endif
}

__forceinline GSVector4 msub(const GSVector4& a, const GSVector4& b) const
{
return *this * a - b;
#if 0//_M_SSE >= 0x501

return GSVector4(_mm_fmsub_ps(m, a, b));

#else

return *this * a - b;

#endif
}

__forceinline GSVector4 nmadd(const GSVector4& a, const GSVector4& b) const
{
return b - *this * a;
#if 0//_M_SSE >= 0x501

return GSVector4(_mm_fnmadd_ps(m, a, b));

#else

return b - *this * a;

#endif
}

__forceinline GSVector4 nmsub(const GSVector4& a, const GSVector4& b) const
{
return -b - *this * a;
#if 0//_M_SSE >= 0x501

return GSVector4(_mm_fnmsub_ps(m, a, b));

#else

return -b - *this * a;

#endif
}

__forceinline GSVector4 addm(const GSVector4& a, const GSVector4& b) const
Expand Down Expand Up @@ -460,16 +492,10 @@ class alignas(16) GSVector4

return _mm_testz_ps(m, m) != 0;

#elif _M_SSE >= 0x401

__m128i a = _mm_castps_si128(m);

return _mm_testz_si128(a, a) != 0;

#else

return mask() == 0;

#endif
}

Expand All @@ -482,6 +508,15 @@ class alignas(16) GSVector4
{
// TODO: use blendps when src == dst


#if 0 // _M_SSE >= 0x401

// NOTE: it's faster with shuffles...

return GSVector4(_mm_insert_ps(m, v.m, _MM_MK_INSERTPS_NDX(src, dst, 0)));

#else

switch(dst)
{
case 0:
Expand Down Expand Up @@ -527,6 +562,8 @@ class alignas(16) GSVector4
default:
__assume(0);
}

#endif
}

#ifdef __linux__
Expand Down
30 changes: 27 additions & 3 deletions plugins/GSdx/GSVector4i.h
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,15 @@ class alignas(16) GSVector4i

__forceinline GSVector4i upl8() const
{
return GSVector4i(_mm_unpacklo_epi8(m, _mm_setzero_si128()));
#if 0 // _M_SSE >= 0x401 // TODO: compiler bug

return GSVector4i(_mm_cvtepu8_epi16(m));

#else

return GSVector4i(_mm_unpacklo_epi8(m, _mm_setzero_si128()));

#endif
}

__forceinline GSVector4i uph8() const
Expand All @@ -543,7 +551,15 @@ class alignas(16) GSVector4i

__forceinline GSVector4i upl16() const
{
return GSVector4i(_mm_unpacklo_epi16(m, _mm_setzero_si128()));
#if 0 //_M_SSE >= 0x401 // TODO: compiler bug

return GSVector4i(_mm_cvtepu16_epi32(m));

#else

return GSVector4i(_mm_unpacklo_epi16(m, _mm_setzero_si128()));

#endif
}

__forceinline GSVector4i uph16() const
Expand All @@ -553,7 +569,15 @@ class alignas(16) GSVector4i

__forceinline GSVector4i upl32() const
{
return GSVector4i(_mm_unpacklo_epi32(m, _mm_setzero_si128()));
#if 0 //_M_SSE >= 0x401 // TODO: compiler bug

return GSVector4i(_mm_cvtepu32_epi64(m));

#else

return GSVector4i(_mm_unpacklo_epi32(m, _mm_setzero_si128()));

#endif
}

__forceinline GSVector4i uph32() const
Expand Down
61 changes: 0 additions & 61 deletions plugins/GSdx/Renderers/Common/GSVertexTrace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,18 +175,9 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
GSVector4i cmin = GSVector4i::xffffffff();
GSVector4i cmax = GSVector4i::zero();

#if _M_SSE >= 0x401

GSVector4i pmin = GSVector4i::xffffffff();
GSVector4i pmax = GSVector4i::zero();

#else

GSVector4 pmin = s_minmax.xxxx();
GSVector4 pmax = s_minmax.yyyy();

#endif

const GSVertex* RESTRICT v = (GSVertex*)vertex;

for(int i = 0; i < count; i += n)
Expand Down Expand Up @@ -233,21 +224,10 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
GSVector4i xy = xyzf.upl16();
GSVector4i z = xyzf.yyyy();

#if _M_SSE >= 0x401

GSVector4i p = xy.blend16<0xf0>(z.uph32(xyzf));

pmin = pmin.min_u32(p);
pmax = pmax.max_u32(p);

#else

GSVector4 p = GSVector4(xy.upl64(z.srl32(1).upl32(xyzf.wwww())));

pmin = pmin.min(p);
pmax = pmax.max(p);

#endif
}
else if(primclass == GS_LINE_CLASS)
{
Expand Down Expand Up @@ -314,23 +294,11 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
GSVector4i xy1 = xyzf1.upl16();
GSVector4i z1 = xyzf1.yyyy();

#if _M_SSE >= 0x401

GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf0));
GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1));

pmin = pmin.min_u32(p0.min_u32(p1));
pmax = pmax.max_u32(p0.max_u32(p1));

#else

GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf0.wwww())));
GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww())));

pmin = pmin.min(p0.min(p1));
pmax = pmax.max(p0.max(p1));

#endif
}
else if(primclass == GS_TRIANGLE_CLASS)
{
Expand Down Expand Up @@ -406,25 +374,12 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
GSVector4i xy2 = xyzf2.upl16();
GSVector4i z2 = xyzf2.yyyy();

#if _M_SSE >= 0x401

GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf0));
GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1));
GSVector4i p2 = xy2.blend16<0xf0>(z2.uph32(xyzf2));

pmin = pmin.min_u32(p2).min_u32(p0.min_u32(p1));
pmax = pmax.max_u32(p2).max_u32(p0.max_u32(p1));

#else

GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf0.wwww())));
GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww())));
GSVector4 p2 = GSVector4(xy2.upl64(z2.srl32(1).upl32(xyzf2.wwww())));

pmin = pmin.min(p2).min(p0.min(p1));
pmax = pmax.max(p2).max(p0.max(p1));

#endif
}
else if(primclass == GS_SPRITE_CLASS)
{
Expand Down Expand Up @@ -491,23 +446,11 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
GSVector4i xy1 = xyzf1.upl16();
GSVector4i z1 = xyzf1.yyyy();

#if _M_SSE >= 0x401

GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf1));
GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1));

pmin = pmin.min_u32(p0.min_u32(p1));
pmax = pmax.max_u32(p0.max_u32(p1));

#else

GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf1.wwww())));
GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww())));

pmin = pmin.min(p0.min(p1));
pmax = pmax.max(p0.max(p1));

#endif
}
}

Expand All @@ -516,13 +459,9 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
// be true if depth isn't constant but close enough. It also imply that
// pmin.z & 1 == 0 and pax.z & 1 == 0

#if _M_SSE >= 0x401

pmin = pmin.blend16<0x30>(pmin.srl32(1));
pmax = pmax.blend16<0x30>(pmax.srl32(1));

#endif

GSVector4 o(context->XYOFFSET);
GSVector4 s(1.0f / 16, 1.0f / 16, 2.0f, 1.0f);

Expand Down
Loading

0 comments on commit 1a46a43

Please sign in to comment.