From d0b66d06ec0aa52290dede5b7c057cacbbeaa9b7 Mon Sep 17 00:00:00 2001 From: Gauvain 'GovanifY' Roussel-Tarbouriech Date: Sat, 27 Mar 2021 09:18:02 +0100 Subject: [PATCH] core: purge sse2 --- pcsx2/gui/AppInit.cpp | 10 +- pcsx2/gui/Panels/PluginSelectorPanel.cpp | 3 - pcsx2/x86/iMMI.cpp | 212 +++-------------------- pcsx2/x86/ix86-32/iR5900-32.cpp | 4 +- pcsx2/x86/microVU.cpp | 2 +- pcsx2/x86/microVU_Alloc.inl | 10 +- pcsx2/x86/microVU_Clamp.inl | 31 +--- pcsx2/x86/microVU_Lower.inl | 20 +-- pcsx2/x86/microVU_Misc.inl | 127 +++----------- pcsx2/x86/newVif_UnpackSSE.cpp | 113 +----------- 10 files changed, 71 insertions(+), 461 deletions(-) diff --git a/pcsx2/gui/AppInit.cpp b/pcsx2/gui/AppInit.cpp index 47f0b3da19075b..88727bea970228 100644 --- a/pcsx2/gui/AppInit.cpp +++ b/pcsx2/gui/AppInit.cpp @@ -46,13 +46,13 @@ void Pcsx2App::DetectCpuAndUserMode() x86caps.CountCores(); x86caps.SIMD_EstablishMXCSRmask(); - if (!x86caps.hasStreamingSIMD2Extensions) + if (!x86caps.hasStreamingSIMD4Extensions) { - // This code will probably never run if the binary was correctly compiled for SSE2 - // SSE2 is required for any decent speed and is supported by more than decade old x86 CPUs + // This code will probably never run if the binary was correctly compiled for SSE4 + // SSE4 is required for any decent speed and is supported by more than decade old x86 CPUs throw Exception::HardwareDeficiency() - .SetDiagMsg(L"Critical Failure: SSE2 Extensions not available.") - .SetUserMsg(_("SSE2 extensions are not available. PCSX2 requires a cpu that supports the SSE2 instruction set.")); + .SetDiagMsg(L"Critical Failure: SSE4 Extensions not available.") + .SetUserMsg(_("SSE4 extensions are not available. PCSX2 requires a cpu that supports the SSE4 instruction set.")); } #endif diff --git a/pcsx2/gui/Panels/PluginSelectorPanel.cpp b/pcsx2/gui/Panels/PluginSelectorPanel.cpp index 6ef3d431d4072e..ff8152caf44d6e 100644 --- a/pcsx2/gui/Panels/PluginSelectorPanel.cpp +++ b/pcsx2/gui/Panels/PluginSelectorPanel.cpp @@ -684,7 +684,6 @@ void Panels::PluginSelectorPanel::OnEnumComplete( wxCommandEvent& evt ) int index_avx2 = -1; int index_sse4 = -1; - int index_sse2 = -1; for( int i = 0; i < count; i++ ) { @@ -692,12 +691,10 @@ void Panels::PluginSelectorPanel::OnEnumComplete( wxCommandEvent& evt ) if( x86caps.hasAVX2 && str.Contains("AVX2") ) index_avx2 = i; if( x86caps.hasStreamingSIMD4Extensions && str.Contains("SSE4") ) index_sse4 = i; - if( str.Contains("SSE2") ) index_sse2 = i; } if( index_avx2 >= 0 ) m_ComponentBoxes->Get(pid).SetSelection( index_avx2 ); else if( index_sse4 >= 0 ) m_ComponentBoxes->Get(pid).SetSelection( index_sse4 ); - else if( index_sse2 >= 0 ) m_ComponentBoxes->Get(pid).SetSelection( index_sse2 ); else m_ComponentBoxes->Get(pid).SetSelection( 0 ); } else diff --git a/pcsx2/x86/iMMI.cpp b/pcsx2/x86/iMMI.cpp index 99a4c98fb30548..4a18e828abedf4 100644 --- a/pcsx2/x86/iMMI.cpp +++ b/pcsx2/x86/iMMI.cpp @@ -215,17 +215,9 @@ void recPMTHL() int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI ); - if ( x86caps.hasStreamingSIMD4Extensions ) { - xBLEND.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_S), 0x5); - xSHUF.PS(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_S), 0xdd); - xSHUF.PS(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI), 0x72); - } - else { - xSHUF.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_S), 0x8d); - xSHUF.PS(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_S), 0xdd); - xSHUF.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO), 0x72); - xSHUF.PS(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI), 0x72); - } + xBLEND.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_S), 0x5); + xSHUF.PS(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_S), 0xdd); + xSHUF.PS(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI), 0x72); _clearNeededXMMregs(); } @@ -400,47 +392,12 @@ void recPMAXW() EE::Profiler.EmitOp(eeOpcode::PMAXW); int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); - if ( x86caps.hasStreamingSIMD4Extensions ) { - if( EEREC_S == EEREC_T ) xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else if( EEREC_D == EEREC_S ) xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if ( EEREC_D == EEREC_T ) xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { - xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - } - } + if( EEREC_S == EEREC_T ) xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else if( EEREC_D == EEREC_S ) xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if ( EEREC_D == EEREC_T ) xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); else { - int t0reg; - - if( EEREC_S == EEREC_T ) { - xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - } - else { - t0reg = _allocTempXMMreg(XMMT_INT, -1); - xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); - xPCMP.GTD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); - - if( EEREC_D == EEREC_S ) { - xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); - xPANDN(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); - } - else if( EEREC_D == EEREC_T ) { - int t1reg = _allocTempXMMreg(XMMT_INT, -1); - xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T)); - xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); - xPANDN(xRegisterSSE(t0reg), xRegisterSSE(t1reg)); - _freeXMMreg(t1reg); - } - else { - xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); - xPANDN(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); - } - - xPOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); - _freeXMMreg(t0reg); - } + xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } _clearNeededXMMregs(); } @@ -1173,18 +1130,7 @@ void recPABSW() //needs clamping xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg)); xPSLL.D(xRegisterSSE(t0reg), 31); xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); //0xffffffff if equal to 0x80000000 - if( x86caps.hasSupplementalStreamingSIMD3Extensions ) { - xPABS.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); //0x80000000 -> 0x80000000 - } - else { - int t1reg = _allocTempXMMreg(XMMT_INT, -1); - xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T)); - xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - xPSRA.D(xRegisterSSE(t1reg), 31); - xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); - xPSUB.D(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); //0x80000000 -> 0x80000000 - _freeXMMreg(t1reg); - } + xPABS.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); //0x80000000 -> 0x80000000 xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); //0x80000000 -> 0x7fffffff _freeXMMreg(t0reg); _clearNeededXMMregs(); @@ -1203,18 +1149,7 @@ void recPABSH() xPCMP.EQW(xRegisterSSE(t0reg), xRegisterSSE(t0reg)); xPSLL.W(xRegisterSSE(t0reg), 15); xPCMP.EQW(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); //0xffff if equal to 0x8000 - if( x86caps.hasSupplementalStreamingSIMD3Extensions ) { - xPABS.W(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); //0x8000 -> 0x8000 - } - else { - int t1reg = _allocTempXMMreg(XMMT_INT, -1); - xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T)); - xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - xPSRA.W(xRegisterSSE(t1reg), 15); - xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); - xPSUB.W(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); //0x8000 -> 0x8000 - _freeXMMreg(t1reg); - } + xPABS.W(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); //0x8000 -> 0x8000 xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); //0x8000 -> 0x7fff _freeXMMreg(t0reg); _clearNeededXMMregs(); @@ -1228,47 +1163,12 @@ void recPMINW() EE::Profiler.EmitOp(eeOpcode::PMINW); int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); - if ( x86caps.hasStreamingSIMD4Extensions ) { - if( EEREC_S == EEREC_T ) xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else if( EEREC_D == EEREC_S ) xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if ( EEREC_D == EEREC_T ) xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { - xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - } - } + if( EEREC_S == EEREC_T ) xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else if( EEREC_D == EEREC_S ) xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if ( EEREC_D == EEREC_T ) xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); else { - int t0reg; - - if( EEREC_S == EEREC_T ) { - xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - } - else { - t0reg = _allocTempXMMreg(XMMT_INT, -1); - xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); - xPCMP.GTD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); - - if( EEREC_D == EEREC_S ) { - xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); - xPANDN(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); - } - else if( EEREC_D == EEREC_T ) { - int t1reg = _allocTempXMMreg(XMMT_INT, -1); - xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T)); - xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); - xPANDN(xRegisterSSE(t0reg), xRegisterSSE(t1reg)); - _freeXMMreg(t1reg); - } - else { - xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); - xPANDN(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); - } - - xPOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); - _freeXMMreg(t0reg); - } + xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } _clearNeededXMMregs(); } @@ -1718,12 +1618,6 @@ void recPMADDW() { EE::Profiler.EmitOp(eeOpcode::PMADDW); - if( !x86caps.hasStreamingSIMD4Extensions ) { - _deleteEEreg(_Rd_, 0); - recCall(Interp::PMADDW); - return; - } - int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI ); xSHUF.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_HI), 0x88); xPSHUF.D(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO), 0xd8); // LO = {LO[0], HI[0], LO[2], HI[2]} @@ -1775,18 +1669,8 @@ void recPSLLVW() xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); } else { - if ( x86caps.hasStreamingSIMD4Extensions ) { - xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88); - xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); - } - else { - int t0reg = _allocTempXMMreg(XMMT_INT, -1); - xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88); - xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_D)); - xPSRA.D(xRegisterSSE(t0reg), 31); - xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); - _freeXMMreg(t0reg); - } + xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88); + xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); } } else if( _Rt_ == 0 ) { @@ -1813,16 +1697,8 @@ void recPSLLVW() xPSLL.D(xRegisterSSE(t1reg), xRegisterSSE(t0reg)); // merge & sign extend - if ( x86caps.hasStreamingSIMD4Extensions ) { - xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); - xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); - } - else { - xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); - xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_D)); - xPSRA.D(xRegisterSSE(t0reg), 31); // get the signs - xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); - } + xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); + xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); _freeXMMreg(t0reg); _freeXMMreg(t1reg); @@ -1843,18 +1719,8 @@ void recPSRLVW() xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); } else { - if ( x86caps.hasStreamingSIMD4Extensions ) { - xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88); - xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); - } - else { - int t0reg = _allocTempXMMreg(XMMT_INT, -1); - xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88); - xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_D)); - xPSRA.D(xRegisterSSE(t0reg), 31); - xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); - _freeXMMreg(t0reg); - } + xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88); + xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); } } else if( _Rt_ == 0 ) { @@ -1881,16 +1747,8 @@ void recPSRLVW() xPSRL.D(xRegisterSSE(t1reg), xRegisterSSE(t0reg)); // merge & sign extend - if ( x86caps.hasStreamingSIMD4Extensions ) { - xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); - xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); - } - else { - xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); - xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_D)); - xPSRA.D(xRegisterSSE(t0reg), 31); // get the signs - xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); - } + xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); + xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); _freeXMMreg(t0reg); _freeXMMreg(t1reg); @@ -1903,11 +1761,6 @@ void recPMSUBW() { EE::Profiler.EmitOp(eeOpcode::PMSUBW); - if( !x86caps.hasStreamingSIMD4Extensions ) { - _deleteEEreg(_Rd_, 0); - recCall(Interp::PMSUBW); - return; - } int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI ); xSHUF.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_HI), 0x88); xPSHUF.D(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO), 0xd8); // LO = {LO[0], HI[0], LO[2], HI[2]} @@ -1957,11 +1810,6 @@ void recPMULTW() { EE::Profiler.EmitOp(eeOpcode::PMULTW); - if( !x86caps.hasStreamingSIMD4Extensions ) { - _deleteEEreg(_Rd_, 0); - recCall(Interp::PMULTW); - return; - } int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI ); if( !_Rs_ || !_Rt_ ) { if( _Rd_ ) xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); @@ -2455,18 +2303,8 @@ void recPSRAVW() xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); } else { - if ( x86caps.hasStreamingSIMD4Extensions ) { - xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88); - xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); - } - else { - int t0reg = _allocTempXMMreg(XMMT_INT, -1); - xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88); - xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_D)); - xPSRA.D(xRegisterSSE(t0reg), 31); - xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); - _freeXMMreg(t0reg); - } + xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88); + xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); } } else if( _Rt_ == 0 ) { diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index 355491f2eef60b..ba26361e04d3f6 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -508,8 +508,8 @@ static void recReserve() { // Hardware Requirements Check... - if ( !x86caps.hasStreamingSIMD2Extensions ) - recThrowHardwareDeficiency( L"SSE2" ); + if ( !x86caps.hasStreamingSIMD4Extensions ) + recThrowHardwareDeficiency( L"SSE4" ); recReserveCache(); } diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index e2d9cee3e8fbff..f5db622152130a 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -47,7 +47,7 @@ void mVUreserveCache(microVU& mVU) { // Only run this once per VU! ;) void mVUinit(microVU& mVU, uint vuIndex) { - if(!x86caps.hasStreamingSIMD2Extensions) mVUthrowHardwareDeficiency( L"SSE2", vuIndex ); + if(!x86caps.hasStreamingSIMD4Extensions) mVUthrowHardwareDeficiency( L"SSE4", vuIndex ); memzero(mVU.prog); diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index fa65645b6110de..1634c34cf87ed6 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -166,13 +166,7 @@ __fi void getQreg(const xmm& reg, int qInstance) __ri void writeQreg(const xmm& reg, int qInstance) { - if (qInstance) { - if (!x86caps.hasStreamingSIMD4Extensions) { - xPSHUF.D(xmmPQ, xmmPQ, 0xe1); - xMOVSS(xmmPQ, reg); - xPSHUF.D(xmmPQ, xmmPQ, 0xe1); - } - else xINSERTPS(xmmPQ, reg, _MM_MK_INSERTPS_NDX(0, 1, 0)); - } + if (qInstance) + xINSERTPS(xmmPQ, reg, _MM_MK_INSERTPS_NDX(0, 1, 0)); else xMOVSS(xmmPQ, reg); } diff --git a/pcsx2/x86/microVU_Clamp.inl b/pcsx2/x86/microVU_Clamp.inl index e1ebef153ba948..5a145ed373fe0f 100644 --- a/pcsx2/x86/microVU_Clamp.inl +++ b/pcsx2/x86/microVU_Clamp.inl @@ -56,33 +56,10 @@ void mVUclamp1(const xmm& reg, const xmm& regT1, int xyzw, bool bClampE = 0) { // so we just use a temporary mem location for our backup for now... (non-sse4 version only) void mVUclamp2(microVU& mVU, const xmm& reg, const xmm& regT1in, int xyzw, bool bClampE = 0) { if ((!clampE && CHECK_VU_SIGN_OVERFLOW) || (clampE && bClampE && CHECK_VU_SIGN_OVERFLOW)) { - if (x86caps.hasStreamingSIMD4Extensions) { - int i = (xyzw==1||xyzw==2||xyzw==4||xyzw==8) ? 0: 1; - xPMIN.SD(reg, ptr128[&sse4_maxvals[i][0]]); - xPMIN.UD(reg, ptr128[&sse4_minvals[i][0]]); - return; - } - //const xmm& regT1 = regT1b ? mVU.regAlloc->allocReg() : regT1in; - const xmm& regT1 = regT1in.IsEmpty() ? xmm((reg.Id + 1) % 8) : regT1in; - if (regT1 != regT1in) xMOVAPS(ptr128[mVU.xmmCTemp], regT1); - switch (xyzw) { - case 1: case 2: case 4: case 8: - xMOVAPS(regT1, reg); - xAND.PS(regT1, ptr128[mVUglob.signbit]); - xMIN.SS(reg, ptr128[mVUglob.maxvals]); - xMAX.SS(reg, ptr128[mVUglob.minvals]); - xOR.PS (reg, regT1); - break; - default: - xMOVAPS(regT1, reg); - xAND.PS(regT1, ptr128[mVUglob.signbit]); - xMIN.PS(reg, ptr128[mVUglob.maxvals]); - xMAX.PS(reg, ptr128[mVUglob.minvals]); - xOR.PS (reg, regT1); - break; - } - //if (regT1 != regT1in) mVU.regAlloc->clearNeeded(regT1); - if (regT1 != regT1in) xMOVAPS(regT1, ptr128[mVU.xmmCTemp]); + int i = (xyzw==1||xyzw==2||xyzw==4||xyzw==8) ? 0: 1; + xPMIN.SD(reg, ptr128[&sse4_maxvals[i][0]]); + xPMIN.UD(reg, ptr128[&sse4_minvals[i][0]]); + return; } else mVUclamp1(reg, regT1in, xyzw, bClampE); } diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 6a761f1b440078..850ddc9a3f0ef1 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -28,11 +28,7 @@ static __fi void testZero(const xmm& xmmReg, const xmm& xmmTemp, const x32& gprT { xXOR.PS(xmmTemp, xmmTemp); xCMPEQ.SS(xmmTemp, xmmReg); - if (!x86caps.hasStreamingSIMD4Extensions) { - xMOVMSKPS(gprTemp, xmmTemp); - xTEST(gprTemp, 1); - } - else xPTEST(xmmTemp, xmmTemp); + xPTEST(xmmTemp, xmmTemp); } // Test if Vector is Negative (Set Flags and Makes Positive) @@ -298,18 +294,8 @@ mVUop(mVU_EEXP) { // sumXYZ(): PQ.x = x ^ 2 + y ^ 2 + z ^ 2 static __fi void mVU_sumXYZ(mV, const xmm& PQ, const xmm& Fs) { - if (x86caps.hasStreamingSIMD4Extensions) { - xDP.PS(Fs, Fs, 0x71); - xMOVSS(PQ, Fs); - } - else { - SSE_MULPS(mVU, Fs, Fs); // wzyx ^ 2 - xMOVSS (PQ, Fs); // x ^ 2 - xPSHUF.D (Fs, Fs, 0xe1); // wzyx -> wzxy - SSE_ADDSS(mVU, PQ, Fs); // x ^ 2 + y ^ 2 - xPSHUF.D (Fs, Fs, 0xd2); // wzxy -> wxyz - SSE_ADDSS(mVU, PQ, Fs); // x ^ 2 + y ^ 2 + z ^ 2 - } + xDP.PS(Fs, Fs, 0x71); + xMOVSS(PQ, Fs); } mVUop(mVU_ELENG) { diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index 98a945131ca9bf..47c4a3b5e2fa4b 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -59,72 +59,29 @@ void mVUsaveReg(const xmm& reg, xAddressVoid ptr, int xyzw, bool modXYZW) return;*/ switch ( xyzw ) { - case 5: if (x86caps.hasStreamingSIMD4Extensions) { - xEXTRACTPS(ptr32[ptr+4], reg, 1); - xEXTRACTPS(ptr32[ptr+12], reg, 3); - } - else { - xPSHUF.D(reg, reg, 0xe1); //WZXY - xMOVSS(ptr32[ptr+4], reg); - xPSHUF.D(reg, reg, 0xff); //WWWW - xMOVSS(ptr32[ptr+12], reg); - } + case 5: xEXTRACTPS(ptr32[ptr+4], reg, 1); + xEXTRACTPS(ptr32[ptr+12], reg, 3); break; // YW case 6: xPSHUF.D(reg, reg, 0xc9); xMOVL.PS(ptr64[ptr+4], reg); break; // YZ - case 7: if (x86caps.hasStreamingSIMD4Extensions) { - xMOVH.PS(ptr64[ptr+8], reg); - xEXTRACTPS(ptr32[ptr+4], reg, 1); - } - else { - xPSHUF.D(reg, reg, 0x93); //ZYXW - xMOVH.PS(ptr64[ptr+4], reg); - xMOVSS(ptr32[ptr+12], reg); - } + case 7: xMOVH.PS(ptr64[ptr+8], reg); + xEXTRACTPS(ptr32[ptr+4], reg, 1); break; // YZW - case 9: if (x86caps.hasStreamingSIMD4Extensions) { - xMOVSS(ptr32[ptr], reg); - xEXTRACTPS(ptr32[ptr+12], reg, 3); - } - else { - xMOVSS(ptr32[ptr], reg); - xPSHUF.D(reg, reg, 0xff); //WWWW - xMOVSS(ptr32[ptr+12], reg); - } + case 9: xMOVSS(ptr32[ptr], reg); + xEXTRACTPS(ptr32[ptr+12], reg, 3); break; // XW - case 10: if (x86caps.hasStreamingSIMD4Extensions) { - xMOVSS(ptr32[ptr], reg); - xEXTRACTPS(ptr32[ptr+8], reg, 2); - } - else { - xMOVSS(ptr32[ptr], reg); - xMOVHL.PS(reg, reg); - xMOVSS(ptr32[ptr+8], reg); - } + case 10: xMOVSS(ptr32[ptr], reg); + xEXTRACTPS(ptr32[ptr+8], reg, 2); break; //XZ case 11: xMOVSS(ptr32[ptr], reg); xMOVH.PS(ptr64[ptr+8], reg); break; //XZW - case 13: if (x86caps.hasStreamingSIMD4Extensions) { - xMOVL.PS(ptr64[ptr], reg); - xEXTRACTPS(ptr32[ptr+12], reg, 3); - } - else { - xPSHUF.D(reg, reg, 0x4b); //YXZW - xMOVH.PS(ptr64[ptr], reg); - xMOVSS(ptr32[ptr+12], reg); - } + case 13: xMOVL.PS(ptr64[ptr], reg); + xEXTRACTPS(ptr32[ptr+12], reg, 3); break; // XYW - case 14: if (x86caps.hasStreamingSIMD4Extensions) { - xMOVL.PS(ptr64[ptr], reg); - xEXTRACTPS(ptr32[ptr+8], reg, 2); - } - else { - xMOVL.PS(ptr64[ptr], reg); - xMOVHL.PS(reg, reg); - xMOVSS(ptr32[ptr+8], reg); - } + case 14: xMOVL.PS(ptr64[ptr], reg); + xEXTRACTPS(ptr32[ptr+8], reg, 2); break; // XYZ case 4: if (!modXYZW) mVUunpack_xyzw(reg, reg, 1); xMOVSS(ptr32[ptr+4], reg); @@ -146,8 +103,14 @@ void mVUsaveReg(const xmm& reg, xAddressVoid ptr, int xyzw, bool modXYZW) void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW) { xyzw &= 0xf; - if ( (dest != src) && (xyzw != 0) ) { - if (x86caps.hasStreamingSIMD4Extensions && (xyzw != 0x8) && (xyzw != 0xf)) { + if ( (dest != src) && (xyzw != 0) ) + { + if (xyzw == 0x8) + xMOVSS(dest, src); + else if (xyzw == 0xf) + xMOVAPS(dest, src); + else + { if (modXYZW) { if (xyzw == 1) { xINSERTPS(dest, src, _MM_MK_INSERTPS_NDX(0, 3, 0)); return; } else if (xyzw == 2) { xINSERTPS(dest, src, _MM_MK_INSERTPS_NDX(0, 2, 0)); return; } @@ -156,56 +119,6 @@ void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW) xyzw = ((xyzw & 1) << 3) | ((xyzw & 2) << 1) | ((xyzw & 4) >> 1) | ((xyzw & 8) >> 3); xBLEND.PS(dest, src, xyzw); } - else { - switch (xyzw) { - case 1: if (modXYZW) mVUunpack_xyzw(src, src, 0); - xMOVHL.PS(src, dest); // src = Sw Sz Dw Dz - xSHUF.PS(dest, src, 0xc4); // 11 00 01 00 - break; - case 2: if (modXYZW) mVUunpack_xyzw(src, src, 0); - xMOVHL.PS(src, dest); - xSHUF.PS(dest, src, 0x64); - break; - case 3: xSHUF.PS(dest, src, 0xe4); - break; - case 4: if (modXYZW) mVUunpack_xyzw(src, src, 0); - xMOVSS(src, dest); - xMOVSD(dest, src); - break; - case 5: xSHUF.PS(dest, src, 0xd8); - xPSHUF.D(dest, dest, 0xd8); - break; - case 6: xSHUF.PS(dest, src, 0x9c); - xPSHUF.D(dest, dest, 0x78); - break; - case 7: xMOVSS(src, dest); - xMOVAPS(dest, src); - break; - case 8: xMOVSS(dest, src); - break; - case 9: xSHUF.PS(dest, src, 0xc9); - xPSHUF.D(dest, dest, 0xd2); - break; - case 10: xSHUF.PS(dest, src, 0x8d); - xPSHUF.D(dest, dest, 0x72); - break; - case 11: xMOVSS(dest, src); - xSHUF.PS(dest, src, 0xe4); - break; - case 12: xMOVSD(dest, src); - break; - case 13: xMOVHL.PS(dest, src); - xSHUF.PS(src, dest, 0x64); - xMOVAPS(dest, src); - break; - case 14: xMOVHL.PS(dest, src); - xSHUF.PS(src, dest, 0xc4); - xMOVAPS(dest, src); - break; - default: xMOVAPS(dest, src); - break; - } - } } } diff --git a/pcsx2/x86/newVif_UnpackSSE.cpp b/pcsx2/x86/newVif_UnpackSSE.cpp index fad4a67ce60d4e..d59c52328d8587 100644 --- a/pcsx2/x86/newVif_UnpackSSE.cpp +++ b/pcsx2/x86/newVif_UnpackSSE.cpp @@ -35,23 +35,7 @@ static RecompiledCodeReserve* nVifUpkExec = NULL; // Merges xmm vectors without modifying source reg void mergeVectors(xRegisterSSE dest, xRegisterSSE src, xRegisterSSE temp, int xyzw) { - if (x86caps.hasStreamingSIMD4Extensions || (xyzw==15) - || (xyzw==12) || (xyzw==11) || (xyzw==8) || (xyzw==3)) { - mVUmergeRegs(dest, src, xyzw); - } - else - { - if(temp != src) xMOVAPS(temp, src); //Sometimes we don't care if the source is modified and is temp reg. - if(dest == temp) - { - //VIF can sent the temp directory as the source and destination, just need to clear the ones we dont want in which case. - if(!(xyzw & 0x1)) xAND.PS( dest, ptr128[SSEXYZWMask[0]]); - if(!(xyzw & 0x2)) xAND.PS( dest, ptr128[SSEXYZWMask[1]]); - if(!(xyzw & 0x4)) xAND.PS( dest, ptr128[SSEXYZWMask[2]]); - if(!(xyzw & 0x8)) xAND.PS( dest, ptr128[SSEXYZWMask[3]]); - } - else mVUmergeRegs(dest, temp, xyzw); - } + mVUmergeRegs(dest, src, xyzw); } // ===================================================================================================== @@ -113,16 +97,6 @@ void VifUnpackSSE_Base::xUPK_S_32() const { void VifUnpackSSE_Base::xUPK_S_16() const { - if (!x86caps.hasStreamingSIMD4Extensions) - { - xMOV16 (workReg, ptr32[srcIndirect]); - xPUNPCK.LWD(workReg, workReg); - xShiftR (workReg, 16); - - xPSHUF.D (destReg, workReg, _v0); - return; - } - switch(UnpkLoopIteration) { case 0: @@ -144,17 +118,6 @@ void VifUnpackSSE_Base::xUPK_S_16() const { void VifUnpackSSE_Base::xUPK_S_8() const { - if (!x86caps.hasStreamingSIMD4Extensions) - { - xMOV8 (workReg, ptr32[srcIndirect]); - xPUNPCK.LBW(workReg, workReg); - xPUNPCK.LWD(workReg, workReg); - xShiftR (workReg, 24); - - xPSHUF.D (destReg, workReg, _v0); - return; - } - switch(UnpkLoopIteration) { case 0: @@ -200,18 +163,8 @@ void VifUnpackSSE_Base::xUPK_V2_16() const { if(UnpkLoopIteration == 0) { - if (x86caps.hasStreamingSIMD4Extensions) - { - xPMOVXX16 (workReg); - - } - else - { - xMOV64 (workReg, ptr64[srcIndirect]); - xPUNPCK.LWD(workReg, workReg); - xShiftR (workReg, 16); - } - xPSHUF.D (destReg, workReg, 0x44); //v1v0v1v0 + xPMOVXX16 (workReg); + xPSHUF.D (destReg, workReg, 0x44); //v1v0v1v0 } else { @@ -223,19 +176,9 @@ void VifUnpackSSE_Base::xUPK_V2_16() const { void VifUnpackSSE_Base::xUPK_V2_8() const { - if(UnpkLoopIteration == 0 || !x86caps.hasStreamingSIMD4Extensions) + if(UnpkLoopIteration == 0) { - if (x86caps.hasStreamingSIMD4Extensions) - { - xPMOVXX8 (workReg); - } - else - { - xMOV16 (workReg, ptr32[srcIndirect]); - xPUNPCK.LBW(workReg, workReg); - xPUNPCK.LWD(workReg, workReg); - xShiftR (workReg, 24); - } + xPMOVXX8 (workReg); xPSHUF.D (destReg, workReg, 0x44); //v1v0v1v0 } else @@ -254,16 +197,7 @@ void VifUnpackSSE_Base::xUPK_V3_32() const { void VifUnpackSSE_Base::xUPK_V3_16() const { - if (x86caps.hasStreamingSIMD4Extensions) - { - xPMOVXX16 (destReg); - } - else - { - xMOV64 (destReg, ptr32[srcIndirect]); - xPUNPCK.LWD(destReg, destReg); - xShiftR (destReg, 16); - } + xPMOVXX16 (destReg); //With V3-16, it takes the first vector from the next position as the W vector //However - IF the end of this iteration of the unpack falls on a quadword boundary, W becomes 0 @@ -278,17 +212,7 @@ void VifUnpackSSE_Base::xUPK_V3_16() const { void VifUnpackSSE_Base::xUPK_V3_8() const { - if (x86caps.hasStreamingSIMD4Extensions) - { - xPMOVXX8 (destReg); - } - else - { - xMOV32 (destReg, ptr32[srcIndirect]); - xPUNPCK.LBW(destReg, destReg); - xPUNPCK.LWD(destReg, destReg); - xShiftR (destReg, 24); - } + xPMOVXX8 (destReg); if (UnpkLoopIteration != IsAligned) xAND.PS(destReg, ptr128[SSEXYZWMask[0]]); } @@ -300,31 +224,12 @@ void VifUnpackSSE_Base::xUPK_V4_32() const { void VifUnpackSSE_Base::xUPK_V4_16() const { - if (x86caps.hasStreamingSIMD4Extensions) - { - xPMOVXX16 (destReg); - } - else - { - xMOV64 (destReg, ptr32[srcIndirect]); - xPUNPCK.LWD(destReg, destReg); - xShiftR (destReg, 16); - } + xPMOVXX16 (destReg); } void VifUnpackSSE_Base::xUPK_V4_8() const { - if (x86caps.hasStreamingSIMD4Extensions) - { - xPMOVXX8 (destReg); - } - else - { - xMOV32 (destReg, ptr32[srcIndirect]); - xPUNPCK.LBW(destReg, destReg); - xPUNPCK.LWD(destReg, destReg); - xShiftR (destReg, 24); - } + xPMOVXX8 (destReg); } void VifUnpackSSE_Base::xUPK_V4_5() const {