Skip to content

Commit

Permalink
sln: remove SSE4 from build options, make SSE4 default
Browse files Browse the repository at this point in the history
  • Loading branch information
GovanifY committed Mar 27, 2021
1 parent 1c88a2d commit f87d06c
Show file tree
Hide file tree
Showing 22 changed files with 173 additions and 616 deletions.
90 changes: 0 additions & 90 deletions PCSX2_suite.sln

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions buildbot.xml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
</PropertyGroup>
<ItemGroup>
<ConfigCPU Include="Debug"/>
<!--ConfigCPU Include="Debug AVX2;Debug SSE4"/-->
<!--ConfigCPU Include="Debug AVX2;Debug"/-->
</ItemGroup>
</Target>
<Target Name="DevelAll">
Expand All @@ -62,15 +62,15 @@
</PropertyGroup>
<ItemGroup>
<ConfigCPU Include="Devel"/>
<!--ConfigCPU Include="Devel AVX2;Devel SSE4"/-->
<!--ConfigCPU Include="Devel AVX2;Devel"/-->
</ItemGroup>
</Target>
<Target Name="ReleaseAll">
<PropertyGroup>
<BaseConfiguration>Release</BaseConfiguration>
</PropertyGroup>
<ItemGroup>
<ConfigCPU Include="Release AVX2;Release SSE4"/>
<ConfigCPU Include="Release AVX2;Release"/>
</ItemGroup>
</Target>
</Project>
14 changes: 0 additions & 14 deletions pcsx2/SPU2/spu2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,20 +50,6 @@ u32 lClocks = 0;
static bool CheckSSE()
{
return true;

#if 0
if( !cpu_detected )
{
cpudetectInit();
cpu_detected = true;
}
if( !x86caps.hasStreamingSIMDExtensions || !x86caps.hasStreamingSIMD2Extensions )
{
SysMessage( "Your CPU does not support SSE2 instructions.\nThe SPU2 plugin requires SSE2 to run." );
return false;
}
return true;
#endif
}

void SPU2configure()
Expand Down
10 changes: 5 additions & 5 deletions pcsx2/gui/AppInit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,13 @@ void Pcsx2App::DetectCpuAndUserMode()
x86caps.CountCores();
x86caps.SIMD_EstablishMXCSRmask();

if (!x86caps.hasStreamingSIMD2Extensions)
if (!x86caps.hasStreamingSIMD4Extensions)
{
// This code will probably never run if the binary was correctly compiled for SSE2
// SSE2 is required for any decent speed and is supported by more than decade old x86 CPUs
// This code will probably never run if the binary was correctly compiled for SSE4
// SSE4 is required for any decent speed and is supported by more than decade old x86 CPUs
throw Exception::HardwareDeficiency()
.SetDiagMsg(L"Critical Failure: SSE2 Extensions not available.")
.SetUserMsg(_("SSE2 extensions are not available. PCSX2 requires a cpu that supports the SSE2 instruction set."));
.SetDiagMsg(L"Critical Failure: SSE4 Extensions not available.")
.SetUserMsg(_("SSE2 extensions are not available. PCSX2 requires a cpu that supports the SSE4 instruction set."));
}
#endif

Expand Down
3 changes: 0 additions & 3 deletions pcsx2/gui/Panels/PluginSelectorPanel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -684,20 +684,17 @@ void Panels::PluginSelectorPanel::OnEnumComplete( wxCommandEvent& evt )

int index_avx2 = -1;
int index_sse4 = -1;
int index_sse2 = -1;

for( int i = 0; i < count; i++ )
{
auto str = m_ComponentBoxes->Get(pid).GetString( i );

if( x86caps.hasAVX2 && str.Contains("AVX2") ) index_avx2 = i;
if( x86caps.hasStreamingSIMD4Extensions && str.Contains("SSE4") ) index_sse4 = i;
if( str.Contains("SSE2") ) index_sse2 = i;
}

if( index_avx2 >= 0 ) m_ComponentBoxes->Get(pid).SetSelection( index_avx2 );
else if( index_sse4 >= 0 ) m_ComponentBoxes->Get(pid).SetSelection( index_sse4 );
else if( index_sse2 >= 0 ) m_ComponentBoxes->Get(pid).SetSelection( index_sse2 );
else m_ComponentBoxes->Get(pid).SetSelection( 0 );
}
else
Expand Down
212 changes: 25 additions & 187 deletions pcsx2/x86/iMMI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,17 +215,9 @@ void recPMTHL()

int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI );

if ( x86caps.hasStreamingSIMD4Extensions ) {
xBLEND.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_S), 0x5);
xSHUF.PS(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_S), 0xdd);
xSHUF.PS(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI), 0x72);
}
else {
xSHUF.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_S), 0x8d);
xSHUF.PS(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_S), 0xdd);
xSHUF.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO), 0x72);
xSHUF.PS(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI), 0x72);
}
xBLEND.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_S), 0x5);
xSHUF.PS(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_S), 0xdd);
xSHUF.PS(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI), 0x72);

_clearNeededXMMregs();
}
Expand Down Expand Up @@ -400,47 +392,12 @@ void recPMAXW()
EE::Profiler.EmitOp(eeOpcode::PMAXW);

int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
if ( x86caps.hasStreamingSIMD4Extensions ) {
if( EEREC_S == EEREC_T ) xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
else if( EEREC_D == EEREC_S ) xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
else if ( EEREC_D == EEREC_T ) xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
else {
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
}
}
if( EEREC_S == EEREC_T ) xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
else if( EEREC_D == EEREC_S ) xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
else if ( EEREC_D == EEREC_T ) xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
else {
int t0reg;

if( EEREC_S == EEREC_T ) {
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
}
else {
t0reg = _allocTempXMMreg(XMMT_INT, -1);
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
xPCMP.GTD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));

if( EEREC_D == EEREC_S ) {
xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
xPANDN(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
}
else if( EEREC_D == EEREC_T ) {
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T));
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
xPANDN(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
_freeXMMreg(t1reg);
}
else {
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
xPANDN(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
}

xPOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
_freeXMMreg(t0reg);
}
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
}
_clearNeededXMMregs();
}
Expand Down Expand Up @@ -1173,18 +1130,7 @@ void recPABSW() //needs clamping
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
xPSLL.D(xRegisterSSE(t0reg), 31);
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); //0xffffffff if equal to 0x80000000
if( x86caps.hasSupplementalStreamingSIMD3Extensions ) {
xPABS.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); //0x80000000 -> 0x80000000
}
else {
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T));
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
xPSRA.D(xRegisterSSE(t1reg), 31);
xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg));
xPSUB.D(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); //0x80000000 -> 0x80000000
_freeXMMreg(t1reg);
}
xPABS.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); //0x80000000 -> 0x80000000
xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); //0x80000000 -> 0x7fffffff
_freeXMMreg(t0reg);
_clearNeededXMMregs();
Expand All @@ -1203,18 +1149,7 @@ void recPABSH()
xPCMP.EQW(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
xPSLL.W(xRegisterSSE(t0reg), 15);
xPCMP.EQW(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); //0xffff if equal to 0x8000
if( x86caps.hasSupplementalStreamingSIMD3Extensions ) {
xPABS.W(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); //0x8000 -> 0x8000
}
else {
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T));
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
xPSRA.W(xRegisterSSE(t1reg), 15);
xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg));
xPSUB.W(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); //0x8000 -> 0x8000
_freeXMMreg(t1reg);
}
xPABS.W(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); //0x8000 -> 0x8000
xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); //0x8000 -> 0x7fff
_freeXMMreg(t0reg);
_clearNeededXMMregs();
Expand All @@ -1228,47 +1163,12 @@ void recPMINW()
EE::Profiler.EmitOp(eeOpcode::PMINW);

int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
if ( x86caps.hasStreamingSIMD4Extensions ) {
if( EEREC_S == EEREC_T ) xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
else if( EEREC_D == EEREC_S ) xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
else if ( EEREC_D == EEREC_T ) xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
else {
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
}
}
if( EEREC_S == EEREC_T ) xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
else if( EEREC_D == EEREC_S ) xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
else if ( EEREC_D == EEREC_T ) xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
else {
int t0reg;

if( EEREC_S == EEREC_T ) {
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
}
else {
t0reg = _allocTempXMMreg(XMMT_INT, -1);
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
xPCMP.GTD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));

if( EEREC_D == EEREC_S ) {
xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
xPANDN(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
}
else if( EEREC_D == EEREC_T ) {
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T));
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
xPANDN(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
_freeXMMreg(t1reg);
}
else {
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
xPANDN(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
}

xPOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
_freeXMMreg(t0reg);
}
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
}
_clearNeededXMMregs();
}
Expand Down Expand Up @@ -1718,12 +1618,6 @@ void recPMADDW()
{
EE::Profiler.EmitOp(eeOpcode::PMADDW);

if( !x86caps.hasStreamingSIMD4Extensions ) {
_deleteEEreg(_Rd_, 0);
recCall(Interp::PMADDW);
return;
}

int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI );
xSHUF.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_HI), 0x88);
xPSHUF.D(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO), 0xd8); // LO = {LO[0], HI[0], LO[2], HI[2]}
Expand Down Expand Up @@ -1775,18 +1669,8 @@ void recPSLLVW()
xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
}
else {
if ( x86caps.hasStreamingSIMD4Extensions ) {
xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88);
xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
}
else {
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88);
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_D));
xPSRA.D(xRegisterSSE(t0reg), 31);
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
_freeXMMreg(t0reg);
}
xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88);
xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
}
}
else if( _Rt_ == 0 ) {
Expand All @@ -1813,16 +1697,8 @@ void recPSLLVW()
xPSLL.D(xRegisterSSE(t1reg), xRegisterSSE(t0reg));

// merge & sign extend
if ( x86caps.hasStreamingSIMD4Extensions ) {
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg));
xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
}
else {
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg));
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_D));
xPSRA.D(xRegisterSSE(t0reg), 31); // get the signs
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
}
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg));
xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));

_freeXMMreg(t0reg);
_freeXMMreg(t1reg);
Expand All @@ -1843,18 +1719,8 @@ void recPSRLVW()
xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
}
else {
if ( x86caps.hasStreamingSIMD4Extensions ) {
xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88);
xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
}
else {
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88);
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_D));
xPSRA.D(xRegisterSSE(t0reg), 31);
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
_freeXMMreg(t0reg);
}
xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88);
xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
}
}
else if( _Rt_ == 0 ) {
Expand All @@ -1881,16 +1747,8 @@ void recPSRLVW()
xPSRL.D(xRegisterSSE(t1reg), xRegisterSSE(t0reg));

// merge & sign extend
if ( x86caps.hasStreamingSIMD4Extensions ) {
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg));
xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
}
else {
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg));
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_D));
xPSRA.D(xRegisterSSE(t0reg), 31); // get the signs
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
}
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg));
xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));

_freeXMMreg(t0reg);
_freeXMMreg(t1reg);
Expand All @@ -1903,11 +1761,6 @@ void recPMSUBW()
{
EE::Profiler.EmitOp(eeOpcode::PMSUBW);

if( !x86caps.hasStreamingSIMD4Extensions ) {
_deleteEEreg(_Rd_, 0);
recCall(Interp::PMSUBW);
return;
}
int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI );
xSHUF.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_HI), 0x88);
xPSHUF.D(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO), 0xd8); // LO = {LO[0], HI[0], LO[2], HI[2]}
Expand Down Expand Up @@ -1957,11 +1810,6 @@ void recPMULTW()
{
EE::Profiler.EmitOp(eeOpcode::PMULTW);

if( !x86caps.hasStreamingSIMD4Extensions ) {
_deleteEEreg(_Rd_, 0);
recCall(Interp::PMULTW);
return;
}
int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI );
if( !_Rs_ || !_Rt_ ) {
if( _Rd_ ) xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
Expand Down Expand Up @@ -2455,18 +2303,8 @@ void recPSRAVW()
xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
}
else {
if ( x86caps.hasStreamingSIMD4Extensions ) {
xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88);
xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
}
else {
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88);
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_D));
xPSRA.D(xRegisterSSE(t0reg), 31);
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
_freeXMMreg(t0reg);
}
xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88);
xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
}
}
else if( _Rt_ == 0 ) {
Expand Down
Loading

0 comments on commit f87d06c

Please sign in to comment.