From fdd0f06d6ee9d806a7aa654d10c2d68f9dfd542d Mon Sep 17 00:00:00 2001 From: Gauvain 'GovanifY' Roussel-Tarbouriech Date: Fri, 26 Mar 2021 23:51:15 +0100 Subject: [PATCH] sln: remove SSE4 from build options, make SSE4 default --- PCSX2_suite.sln | 90 -------- buildbot.xml | 6 +- pcsx2/SPU2/spu2.cpp | 14 -- pcsx2/gui/AppInit.cpp | 10 +- pcsx2/gui/Panels/PluginSelectorPanel.cpp | 3 - pcsx2/x86/iMMI.cpp | 212 +++--------------- pcsx2/x86/ix86-32/iR5900-32.cpp | 4 +- pcsx2/x86/microVU.cpp | 2 +- pcsx2/x86/microVU_Alloc.inl | 10 +- pcsx2/x86/microVU_Clamp.inl | 31 +-- pcsx2/x86/microVU_Lower.inl | 20 +- pcsx2/x86/microVU_Misc.inl | 127 ++--------- pcsx2/x86/newVif_UnpackSSE.cpp | 113 +--------- plugins/GSdx/CMakeLists.txt | 4 +- plugins/GSdx/GSBlock.h | 31 +++ plugins/GSdx/GSState.cpp | 4 - plugins/GSdx/GSUtil.cpp | 10 - plugins/GSdx/GSVector4.h | 48 +++- plugins/GSdx/GSVector4i.h | 24 ++ .../GSdx/Renderers/Common/GSVertexTrace.cpp | 61 ----- .../GSdx/Renderers/DX11/GSRendererDX11.cpp | 4 - .../GSdx/Renderers/OpenGL/GSRendererOGL.cpp | 4 - 22 files changed, 172 insertions(+), 660 deletions(-) diff --git a/PCSX2_suite.sln b/PCSX2_suite.sln index 591b0571444c04..862a07eda5f112 100644 --- a/PCSX2_suite.sln +++ b/PCSX2_suite.sln @@ -77,8 +77,6 @@ Global Devel|x64 = Devel|x64 Release AVX2|Win32 = Release AVX2|Win32 Release AVX2|x64 = Release AVX2|x64 - Release SSE4|Win32 = Release SSE4|Win32 - Release SSE4|x64 = Release SSE4|x64 Release|Win32 = Release|Win32 Release|x64 = Release|x64 EndGlobalSection @@ -95,10 +93,6 @@ Global {1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release AVX2|Win32.Build.0 = Release|Win32 {1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release AVX2|x64.ActiveCfg = Release|x64 {1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release AVX2|x64.Build.0 = Release|x64 - {1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release SSE4|Win32.ActiveCfg = Release|Win32 - {1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release SSE4|Win32.Build.0 = Release|Win32 - {1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release SSE4|x64.ActiveCfg = Release|x64 - {1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release SSE4|x64.Build.0 = Release|x64 {1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release|Win32.ActiveCfg = Release|Win32 {1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release|Win32.Build.0 = Release|Win32 {1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release|x64.ActiveCfg = Release|x64 @@ -115,10 +109,6 @@ Global {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release AVX2|Win32.Build.0 = Release AVX2|Win32 {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release AVX2|x64.ActiveCfg = Release AVX2|x64 {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release AVX2|x64.Build.0 = Release AVX2|x64 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE4|Win32.ActiveCfg = Release SSE4|Win32 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE4|Win32.Build.0 = Release SSE4|Win32 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE4|x64.ActiveCfg = Release SSE4|x64 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE4|x64.Build.0 = Release SSE4|x64 {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release|Win32.ActiveCfg = Release|Win32 {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release|Win32.Build.0 = Release|Win32 {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release|x64.ActiveCfg = Release|x64 @@ -135,10 +125,6 @@ Global {E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release AVX2|Win32.Build.0 = Release|Win32 {E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release AVX2|x64.ActiveCfg = Release|x64 {E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release AVX2|x64.Build.0 = Release|x64 - {E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release SSE4|Win32.ActiveCfg = Release|Win32 - {E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release SSE4|Win32.Build.0 = Release|Win32 - {E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release SSE4|x64.ActiveCfg = Release|x64 - {E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release SSE4|x64.Build.0 = Release|x64 {E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release|Win32.ActiveCfg = Release|Win32 {E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release|Win32.Build.0 = Release|Win32 {E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release|x64.ActiveCfg = Release|x64 @@ -155,10 +141,6 @@ Global {2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Release AVX2|Win32.Build.0 = Release|Win32 {2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Release AVX2|x64.ActiveCfg = Release|x64 {2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Release AVX2|x64.Build.0 = Release|x64 - {2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Release SSE4|Win32.ActiveCfg = Release|Win32 - {2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Release SSE4|Win32.Build.0 = Release|Win32 - {2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Release SSE4|x64.ActiveCfg = Release|x64 - {2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Release SSE4|x64.Build.0 = Release|x64 {2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Release|Win32.ActiveCfg = Release|Win32 {2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Release|Win32.Build.0 = Release|Win32 {2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Release|x64.ActiveCfg = Release|x64 @@ -175,10 +157,6 @@ Global {A51123F5-9505-4EAE-85E7-D320290A272C}.Release AVX2|Win32.Build.0 = Release|Win32 {A51123F5-9505-4EAE-85E7-D320290A272C}.Release AVX2|x64.ActiveCfg = Release|x64 {A51123F5-9505-4EAE-85E7-D320290A272C}.Release AVX2|x64.Build.0 = Release|x64 - {A51123F5-9505-4EAE-85E7-D320290A272C}.Release SSE4|Win32.ActiveCfg = Release|Win32 - {A51123F5-9505-4EAE-85E7-D320290A272C}.Release SSE4|Win32.Build.0 = Release|Win32 - {A51123F5-9505-4EAE-85E7-D320290A272C}.Release SSE4|x64.ActiveCfg = Release|x64 - {A51123F5-9505-4EAE-85E7-D320290A272C}.Release SSE4|x64.Build.0 = Release|x64 {A51123F5-9505-4EAE-85E7-D320290A272C}.Release|Win32.ActiveCfg = Release|Win32 {A51123F5-9505-4EAE-85E7-D320290A272C}.Release|Win32.Build.0 = Release|Win32 {A51123F5-9505-4EAE-85E7-D320290A272C}.Release|x64.ActiveCfg = Release|x64 @@ -195,10 +173,6 @@ Global {4639972E-424E-4E13-8B07-CA403C481346}.Release AVX2|Win32.Build.0 = Release|Win32 {4639972E-424E-4E13-8B07-CA403C481346}.Release AVX2|x64.ActiveCfg = Release|x64 {4639972E-424E-4E13-8B07-CA403C481346}.Release AVX2|x64.Build.0 = Release|x64 - {4639972E-424E-4E13-8B07-CA403C481346}.Release SSE4|Win32.ActiveCfg = Release|Win32 - {4639972E-424E-4E13-8B07-CA403C481346}.Release SSE4|Win32.Build.0 = Release|Win32 - {4639972E-424E-4E13-8B07-CA403C481346}.Release SSE4|x64.ActiveCfg = Release|x64 - {4639972E-424E-4E13-8B07-CA403C481346}.Release SSE4|x64.Build.0 = Release|x64 {4639972E-424E-4E13-8B07-CA403C481346}.Release|Win32.ActiveCfg = Release|Win32 {4639972E-424E-4E13-8B07-CA403C481346}.Release|Win32.Build.0 = Release|Win32 {4639972E-424E-4E13-8B07-CA403C481346}.Release|x64.ActiveCfg = Release|x64 @@ -215,10 +189,6 @@ Global {677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Release AVX2|Win32.Build.0 = Release|Win32 {677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Release AVX2|x64.ActiveCfg = Release|x64 {677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Release AVX2|x64.Build.0 = Release|x64 - {677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Release SSE4|Win32.ActiveCfg = Release|Win32 - {677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Release SSE4|Win32.Build.0 = Release|Win32 - {677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Release SSE4|x64.ActiveCfg = Release|x64 - {677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Release SSE4|x64.Build.0 = Release|x64 {677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Release|Win32.ActiveCfg = Release|Win32 {677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Release|Win32.Build.0 = Release|Win32 {677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Release|x64.ActiveCfg = Release|x64 @@ -235,10 +205,6 @@ Global {BC236261-77E8-4567-8D09-45CD02965EB6}.Release AVX2|Win32.Build.0 = Release|Win32 {BC236261-77E8-4567-8D09-45CD02965EB6}.Release AVX2|x64.ActiveCfg = Release|x64 {BC236261-77E8-4567-8D09-45CD02965EB6}.Release AVX2|x64.Build.0 = Release|x64 - {BC236261-77E8-4567-8D09-45CD02965EB6}.Release SSE4|Win32.ActiveCfg = Release|Win32 - {BC236261-77E8-4567-8D09-45CD02965EB6}.Release SSE4|Win32.Build.0 = Release|Win32 - {BC236261-77E8-4567-8D09-45CD02965EB6}.Release SSE4|x64.ActiveCfg = Release|x64 - {BC236261-77E8-4567-8D09-45CD02965EB6}.Release SSE4|x64.Build.0 = Release|x64 {BC236261-77E8-4567-8D09-45CD02965EB6}.Release|Win32.ActiveCfg = Release|Win32 {BC236261-77E8-4567-8D09-45CD02965EB6}.Release|Win32.Build.0 = Release|Win32 {BC236261-77E8-4567-8D09-45CD02965EB6}.Release|x64.ActiveCfg = Release|x64 @@ -255,10 +221,6 @@ Global {0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release AVX2|Win32.Build.0 = Release (NO ASIO)|Win32 {0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release AVX2|x64.ActiveCfg = Release (NO ASIO)|x64 {0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release AVX2|x64.Build.0 = Release (NO ASIO)|x64 - {0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release SSE4|Win32.ActiveCfg = Release (NO ASIO)|Win32 - {0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release SSE4|Win32.Build.0 = Release (NO ASIO)|Win32 - {0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release SSE4|x64.ActiveCfg = Release (NO ASIO)|x64 - {0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release SSE4|x64.Build.0 = Release (NO ASIO)|x64 {0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release|Win32.ActiveCfg = Release (NO ASIO)|Win32 {0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release|Win32.Build.0 = Release (NO ASIO)|Win32 {0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release|x64.ActiveCfg = Release (NO ASIO)|x64 @@ -275,10 +237,6 @@ Global {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Release AVX2|Win32.Build.0 = Release|Win32 {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Release AVX2|x64.ActiveCfg = Release|x64 {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Release AVX2|x64.Build.0 = Release|x64 - {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Release SSE4|Win32.ActiveCfg = Release|Win32 - {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Release SSE4|Win32.Build.0 = Release|Win32 - {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Release SSE4|x64.ActiveCfg = Release|x64 - {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Release SSE4|x64.Build.0 = Release|x64 {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Release|Win32.ActiveCfg = Release|Win32 {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Release|Win32.Build.0 = Release|Win32 {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Release|x64.ActiveCfg = Release|x64 @@ -295,10 +253,6 @@ Global {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Release AVX2|Win32.Build.0 = Release|Win32 {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Release AVX2|x64.ActiveCfg = Release|x64 {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Release AVX2|x64.Build.0 = Release|x64 - {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Release SSE4|Win32.ActiveCfg = Release|Win32 - {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Release SSE4|Win32.Build.0 = Release|Win32 - {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Release SSE4|x64.ActiveCfg = Release|x64 - {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Release SSE4|x64.Build.0 = Release|x64 {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Release|Win32.ActiveCfg = Release|Win32 {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Release|Win32.Build.0 = Release|Win32 {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Release|x64.ActiveCfg = Release|x64 @@ -315,10 +269,6 @@ Global {24C45343-FD20-5C92-81C1-35A2AE841E79}.Release AVX2|Win32.Build.0 = Release|Win32 {24C45343-FD20-5C92-81C1-35A2AE841E79}.Release AVX2|x64.ActiveCfg = Release|x64 {24C45343-FD20-5C92-81C1-35A2AE841E79}.Release AVX2|x64.Build.0 = Release|x64 - {24C45343-FD20-5C92-81C1-35A2AE841E79}.Release SSE4|Win32.ActiveCfg = Release|Win32 - {24C45343-FD20-5C92-81C1-35A2AE841E79}.Release SSE4|Win32.Build.0 = Release|Win32 - {24C45343-FD20-5C92-81C1-35A2AE841E79}.Release SSE4|x64.ActiveCfg = Release|x64 - {24C45343-FD20-5C92-81C1-35A2AE841E79}.Release SSE4|x64.Build.0 = Release|x64 {24C45343-FD20-5C92-81C1-35A2AE841E79}.Release|Win32.ActiveCfg = Release|Win32 {24C45343-FD20-5C92-81C1-35A2AE841E79}.Release|Win32.Build.0 = Release|Win32 {24C45343-FD20-5C92-81C1-35A2AE841E79}.Release|x64.ActiveCfg = Release|x64 @@ -335,10 +285,6 @@ Global {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Release AVX2|Win32.Build.0 = Release|Win32 {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Release AVX2|x64.ActiveCfg = Release|x64 {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Release AVX2|x64.Build.0 = Release|x64 - {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Release SSE4|Win32.ActiveCfg = Release|Win32 - {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Release SSE4|Win32.Build.0 = Release|Win32 - {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Release SSE4|x64.ActiveCfg = Release|x64 - {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Release SSE4|x64.Build.0 = Release|x64 {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Release|Win32.ActiveCfg = Release|Win32 {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Release|Win32.Build.0 = Release|Win32 {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Release|x64.ActiveCfg = Release|x64 @@ -355,10 +301,6 @@ Global {D6973076-9317-4EF2-A0B8-B7A18AC0713E}.Release AVX2|Win32.Build.0 = Release|Win32 {D6973076-9317-4EF2-A0B8-B7A18AC0713E}.Release AVX2|x64.ActiveCfg = Release|x64 {D6973076-9317-4EF2-A0B8-B7A18AC0713E}.Release AVX2|x64.Build.0 = Release|x64 - {D6973076-9317-4EF2-A0B8-B7A18AC0713E}.Release SSE4|Win32.ActiveCfg = Release|Win32 - {D6973076-9317-4EF2-A0B8-B7A18AC0713E}.Release SSE4|Win32.Build.0 = Release|Win32 - {D6973076-9317-4EF2-A0B8-B7A18AC0713E}.Release SSE4|x64.ActiveCfg = Release|x64 - {D6973076-9317-4EF2-A0B8-B7A18AC0713E}.Release SSE4|x64.Build.0 = Release|x64 {D6973076-9317-4EF2-A0B8-B7A18AC0713E}.Release|Win32.ActiveCfg = Release|Win32 {D6973076-9317-4EF2-A0B8-B7A18AC0713E}.Release|Win32.Build.0 = Release|Win32 {D6973076-9317-4EF2-A0B8-B7A18AC0713E}.Release|x64.ActiveCfg = Release|x64 @@ -375,10 +317,6 @@ Global {0FAE817D-9A32-4830-857E-81DA57246E16}.Release AVX2|Win32.Build.0 = Release|Win32 {0FAE817D-9A32-4830-857E-81DA57246E16}.Release AVX2|x64.ActiveCfg = Release|x64 {0FAE817D-9A32-4830-857E-81DA57246E16}.Release AVX2|x64.Build.0 = Release|x64 - {0FAE817D-9A32-4830-857E-81DA57246E16}.Release SSE4|Win32.ActiveCfg = Release|Win32 - {0FAE817D-9A32-4830-857E-81DA57246E16}.Release SSE4|Win32.Build.0 = Release|Win32 - {0FAE817D-9A32-4830-857E-81DA57246E16}.Release SSE4|x64.ActiveCfg = Release|x64 - {0FAE817D-9A32-4830-857E-81DA57246E16}.Release SSE4|x64.Build.0 = Release|x64 {0FAE817D-9A32-4830-857E-81DA57246E16}.Release|Win32.ActiveCfg = Release|Win32 {0FAE817D-9A32-4830-857E-81DA57246E16}.Release|Win32.Build.0 = Release|Win32 {0FAE817D-9A32-4830-857E-81DA57246E16}.Release|x64.ActiveCfg = Release|x64 @@ -395,10 +333,6 @@ Global {27F17499-A372-4408-8AFA-4F9F4584FBD3}.Release AVX2|Win32.Build.0 = Release|Win32 {27F17499-A372-4408-8AFA-4F9F4584FBD3}.Release AVX2|x64.ActiveCfg = Release|x64 {27F17499-A372-4408-8AFA-4F9F4584FBD3}.Release AVX2|x64.Build.0 = Release|x64 - {27F17499-A372-4408-8AFA-4F9F4584FBD3}.Release SSE4|Win32.ActiveCfg = Release|Win32 - {27F17499-A372-4408-8AFA-4F9F4584FBD3}.Release SSE4|Win32.Build.0 = Release|Win32 - {27F17499-A372-4408-8AFA-4F9F4584FBD3}.Release SSE4|x64.ActiveCfg = Release|x64 - {27F17499-A372-4408-8AFA-4F9F4584FBD3}.Release SSE4|x64.Build.0 = Release|x64 {27F17499-A372-4408-8AFA-4F9F4584FBD3}.Release|Win32.ActiveCfg = Release|Win32 {27F17499-A372-4408-8AFA-4F9F4584FBD3}.Release|Win32.Build.0 = Release|Win32 {27F17499-A372-4408-8AFA-4F9F4584FBD3}.Release|x64.ActiveCfg = Release|x64 @@ -415,10 +349,6 @@ Global {78B079BD-9FC7-4B9E-B4A6-96DA0F00248B}.Release AVX2|Win32.Build.0 = Release|Win32 {78B079BD-9FC7-4B9E-B4A6-96DA0F00248B}.Release AVX2|x64.ActiveCfg = Release|x64 {78B079BD-9FC7-4B9E-B4A6-96DA0F00248B}.Release AVX2|x64.Build.0 = Release|x64 - {78B079BD-9FC7-4B9E-B4A6-96DA0F00248B}.Release SSE4|Win32.ActiveCfg = Release|Win32 - {78B079BD-9FC7-4B9E-B4A6-96DA0F00248B}.Release SSE4|Win32.Build.0 = Release|Win32 - {78B079BD-9FC7-4B9E-B4A6-96DA0F00248B}.Release SSE4|x64.ActiveCfg = Release|x64 - {78B079BD-9FC7-4B9E-B4A6-96DA0F00248B}.Release SSE4|x64.Build.0 = Release|x64 {78B079BD-9FC7-4B9E-B4A6-96DA0F00248B}.Release|Win32.ActiveCfg = Release|Win32 {78B079BD-9FC7-4B9E-B4A6-96DA0F00248B}.Release|Win32.Build.0 = Release|Win32 {78B079BD-9FC7-4B9E-B4A6-96DA0F00248B}.Release|x64.ActiveCfg = Release|x64 @@ -435,10 +365,6 @@ Global {12728250-16EC-4DC6-94D7-E21DD88947F8}.Release AVX2|Win32.Build.0 = Release|Win32 {12728250-16EC-4DC6-94D7-E21DD88947F8}.Release AVX2|x64.ActiveCfg = Release|x64 {12728250-16EC-4DC6-94D7-E21DD88947F8}.Release AVX2|x64.Build.0 = Release|x64 - {12728250-16EC-4DC6-94D7-E21DD88947F8}.Release SSE4|Win32.ActiveCfg = Release|Win32 - {12728250-16EC-4DC6-94D7-E21DD88947F8}.Release SSE4|Win32.Build.0 = Release|Win32 - {12728250-16EC-4DC6-94D7-E21DD88947F8}.Release SSE4|x64.ActiveCfg = Release|x64 - {12728250-16EC-4DC6-94D7-E21DD88947F8}.Release SSE4|x64.Build.0 = Release|x64 {12728250-16EC-4DC6-94D7-E21DD88947F8}.Release|Win32.ActiveCfg = Release|Win32 {12728250-16EC-4DC6-94D7-E21DD88947F8}.Release|Win32.Build.0 = Release|Win32 {12728250-16EC-4DC6-94D7-E21DD88947F8}.Release|x64.ActiveCfg = Release|x64 @@ -455,10 +381,6 @@ Global {449AD25E-424A-4714-BABC-68706CDCC33B}.Release AVX2|Win32.Build.0 = Release|Win32 {449AD25E-424A-4714-BABC-68706CDCC33B}.Release AVX2|x64.ActiveCfg = Release|x64 {449AD25E-424A-4714-BABC-68706CDCC33B}.Release AVX2|x64.Build.0 = Release|x64 - {449AD25E-424A-4714-BABC-68706CDCC33B}.Release SSE4|Win32.ActiveCfg = Release|Win32 - {449AD25E-424A-4714-BABC-68706CDCC33B}.Release SSE4|Win32.Build.0 = Release|Win32 - {449AD25E-424A-4714-BABC-68706CDCC33B}.Release SSE4|x64.ActiveCfg = Release|x64 - {449AD25E-424A-4714-BABC-68706CDCC33B}.Release SSE4|x64.Build.0 = Release|x64 {449AD25E-424A-4714-BABC-68706CDCC33B}.Release|Win32.ActiveCfg = Release|Win32 {449AD25E-424A-4714-BABC-68706CDCC33B}.Release|Win32.Build.0 = Release|Win32 {449AD25E-424A-4714-BABC-68706CDCC33B}.Release|x64.ActiveCfg = Release|x64 @@ -475,10 +397,6 @@ Global {47AFDBEF-F15F-4BC0-B436-5BE443C3F80F}.Release AVX2|Win32.Build.0 = Release|Win32 {47AFDBEF-F15F-4BC0-B436-5BE443C3F80F}.Release AVX2|x64.ActiveCfg = Release|x64 {47AFDBEF-F15F-4BC0-B436-5BE443C3F80F}.Release AVX2|x64.Build.0 = Release|x64 - {47AFDBEF-F15F-4BC0-B436-5BE443C3F80F}.Release SSE4|Win32.ActiveCfg = Release|Win32 - {47AFDBEF-F15F-4BC0-B436-5BE443C3F80F}.Release SSE4|Win32.Build.0 = Release|Win32 - {47AFDBEF-F15F-4BC0-B436-5BE443C3F80F}.Release SSE4|x64.ActiveCfg = Release|x64 - {47AFDBEF-F15F-4BC0-B436-5BE443C3F80F}.Release SSE4|x64.Build.0 = Release|x64 {47AFDBEF-F15F-4BC0-B436-5BE443C3F80F}.Release|Win32.ActiveCfg = Release|Win32 {47AFDBEF-F15F-4BC0-B436-5BE443C3F80F}.Release|Win32.Build.0 = Release|Win32 {47AFDBEF-F15F-4BC0-B436-5BE443C3F80F}.Release|x64.ActiveCfg = Release|x64 @@ -495,10 +413,6 @@ Global {48329442-E41B-4A1F-8364-36EEE1B71343}.Release AVX2|Win32.Build.0 = Release|Win32 {48329442-E41B-4A1F-8364-36EEE1B71343}.Release AVX2|x64.ActiveCfg = Release|x64 {48329442-E41B-4A1F-8364-36EEE1B71343}.Release AVX2|x64.Build.0 = Release|x64 - {48329442-E41B-4A1F-8364-36EEE1B71343}.Release SSE4|Win32.ActiveCfg = Release|Win32 - {48329442-E41B-4A1F-8364-36EEE1B71343}.Release SSE4|Win32.Build.0 = Release|Win32 - {48329442-E41B-4A1F-8364-36EEE1B71343}.Release SSE4|x64.ActiveCfg = Release|x64 - {48329442-E41B-4A1F-8364-36EEE1B71343}.Release SSE4|x64.Build.0 = Release|x64 {48329442-E41B-4A1F-8364-36EEE1B71343}.Release|Win32.ActiveCfg = Release|Win32 {48329442-E41B-4A1F-8364-36EEE1B71343}.Release|Win32.Build.0 = Release|Win32 {48329442-E41B-4A1F-8364-36EEE1B71343}.Release|x64.ActiveCfg = Release|x64 @@ -515,10 +429,6 @@ Global {A0D2B3AD-1F72-4EE3-8B5C-F2C358DA35F0}.Release AVX2|Win32.Build.0 = Release|Win32 {A0D2B3AD-1F72-4EE3-8B5C-F2C358DA35F0}.Release AVX2|x64.ActiveCfg = Release|x64 {A0D2B3AD-1F72-4EE3-8B5C-F2C358DA35F0}.Release AVX2|x64.Build.0 = Release|x64 - {A0D2B3AD-1F72-4EE3-8B5C-F2C358DA35F0}.Release SSE4|Win32.ActiveCfg = Release|Win32 - {A0D2B3AD-1F72-4EE3-8B5C-F2C358DA35F0}.Release SSE4|Win32.Build.0 = Release|Win32 - {A0D2B3AD-1F72-4EE3-8B5C-F2C358DA35F0}.Release SSE4|x64.ActiveCfg = Release|x64 - {A0D2B3AD-1F72-4EE3-8B5C-F2C358DA35F0}.Release SSE4|x64.Build.0 = Release|x64 {A0D2B3AD-1F72-4EE3-8B5C-F2C358DA35F0}.Release|Win32.ActiveCfg = Release|Win32 {A0D2B3AD-1F72-4EE3-8B5C-F2C358DA35F0}.Release|Win32.Build.0 = Release|Win32 {A0D2B3AD-1F72-4EE3-8B5C-F2C358DA35F0}.Release|x64.ActiveCfg = Release|x64 diff --git a/buildbot.xml b/buildbot.xml index 9843a45e65f9ed..8cf22b62b2ef10 100644 --- a/buildbot.xml +++ b/buildbot.xml @@ -53,7 +53,7 @@ - + @@ -62,7 +62,7 @@ - + @@ -70,7 +70,7 @@ Release - + diff --git a/pcsx2/SPU2/spu2.cpp b/pcsx2/SPU2/spu2.cpp index 0474baa6687580..1724193a653f29 100644 --- a/pcsx2/SPU2/spu2.cpp +++ b/pcsx2/SPU2/spu2.cpp @@ -50,20 +50,6 @@ u32 lClocks = 0; static bool CheckSSE() { return true; - -#if 0 - if( !cpu_detected ) - { - cpudetectInit(); - cpu_detected = true; - } - if( !x86caps.hasStreamingSIMDExtensions || !x86caps.hasStreamingSIMD2Extensions ) - { - SysMessage( "Your CPU does not support SSE2 instructions.\nThe SPU2 plugin requires SSE2 to run." ); - return false; - } - return true; -#endif } void SPU2configure() diff --git a/pcsx2/gui/AppInit.cpp b/pcsx2/gui/AppInit.cpp index 47f0b3da19075b..88727bea970228 100644 --- a/pcsx2/gui/AppInit.cpp +++ b/pcsx2/gui/AppInit.cpp @@ -46,13 +46,13 @@ void Pcsx2App::DetectCpuAndUserMode() x86caps.CountCores(); x86caps.SIMD_EstablishMXCSRmask(); - if (!x86caps.hasStreamingSIMD2Extensions) + if (!x86caps.hasStreamingSIMD4Extensions) { - // This code will probably never run if the binary was correctly compiled for SSE2 - // SSE2 is required for any decent speed and is supported by more than decade old x86 CPUs + // This code will probably never run if the binary was correctly compiled for SSE4 + // SSE4 is required for any decent speed and is supported by more than decade old x86 CPUs throw Exception::HardwareDeficiency() - .SetDiagMsg(L"Critical Failure: SSE2 Extensions not available.") - .SetUserMsg(_("SSE2 extensions are not available. PCSX2 requires a cpu that supports the SSE2 instruction set.")); + .SetDiagMsg(L"Critical Failure: SSE4 Extensions not available.") + .SetUserMsg(_("SSE4 extensions are not available. PCSX2 requires a cpu that supports the SSE4 instruction set.")); } #endif diff --git a/pcsx2/gui/Panels/PluginSelectorPanel.cpp b/pcsx2/gui/Panels/PluginSelectorPanel.cpp index 6ef3d431d4072e..ff8152caf44d6e 100644 --- a/pcsx2/gui/Panels/PluginSelectorPanel.cpp +++ b/pcsx2/gui/Panels/PluginSelectorPanel.cpp @@ -684,7 +684,6 @@ void Panels::PluginSelectorPanel::OnEnumComplete( wxCommandEvent& evt ) int index_avx2 = -1; int index_sse4 = -1; - int index_sse2 = -1; for( int i = 0; i < count; i++ ) { @@ -692,12 +691,10 @@ void Panels::PluginSelectorPanel::OnEnumComplete( wxCommandEvent& evt ) if( x86caps.hasAVX2 && str.Contains("AVX2") ) index_avx2 = i; if( x86caps.hasStreamingSIMD4Extensions && str.Contains("SSE4") ) index_sse4 = i; - if( str.Contains("SSE2") ) index_sse2 = i; } if( index_avx2 >= 0 ) m_ComponentBoxes->Get(pid).SetSelection( index_avx2 ); else if( index_sse4 >= 0 ) m_ComponentBoxes->Get(pid).SetSelection( index_sse4 ); - else if( index_sse2 >= 0 ) m_ComponentBoxes->Get(pid).SetSelection( index_sse2 ); else m_ComponentBoxes->Get(pid).SetSelection( 0 ); } else diff --git a/pcsx2/x86/iMMI.cpp b/pcsx2/x86/iMMI.cpp index 99a4c98fb30548..4a18e828abedf4 100644 --- a/pcsx2/x86/iMMI.cpp +++ b/pcsx2/x86/iMMI.cpp @@ -215,17 +215,9 @@ void recPMTHL() int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI ); - if ( x86caps.hasStreamingSIMD4Extensions ) { - xBLEND.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_S), 0x5); - xSHUF.PS(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_S), 0xdd); - xSHUF.PS(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI), 0x72); - } - else { - xSHUF.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_S), 0x8d); - xSHUF.PS(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_S), 0xdd); - xSHUF.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO), 0x72); - xSHUF.PS(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI), 0x72); - } + xBLEND.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_S), 0x5); + xSHUF.PS(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_S), 0xdd); + xSHUF.PS(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI), 0x72); _clearNeededXMMregs(); } @@ -400,47 +392,12 @@ void recPMAXW() EE::Profiler.EmitOp(eeOpcode::PMAXW); int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); - if ( x86caps.hasStreamingSIMD4Extensions ) { - if( EEREC_S == EEREC_T ) xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else if( EEREC_D == EEREC_S ) xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if ( EEREC_D == EEREC_T ) xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { - xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - } - } + if( EEREC_S == EEREC_T ) xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else if( EEREC_D == EEREC_S ) xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if ( EEREC_D == EEREC_T ) xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); else { - int t0reg; - - if( EEREC_S == EEREC_T ) { - xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - } - else { - t0reg = _allocTempXMMreg(XMMT_INT, -1); - xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); - xPCMP.GTD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); - - if( EEREC_D == EEREC_S ) { - xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); - xPANDN(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); - } - else if( EEREC_D == EEREC_T ) { - int t1reg = _allocTempXMMreg(XMMT_INT, -1); - xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T)); - xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); - xPANDN(xRegisterSSE(t0reg), xRegisterSSE(t1reg)); - _freeXMMreg(t1reg); - } - else { - xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); - xPANDN(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); - } - - xPOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); - _freeXMMreg(t0reg); - } + xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } _clearNeededXMMregs(); } @@ -1173,18 +1130,7 @@ void recPABSW() //needs clamping xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg)); xPSLL.D(xRegisterSSE(t0reg), 31); xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); //0xffffffff if equal to 0x80000000 - if( x86caps.hasSupplementalStreamingSIMD3Extensions ) { - xPABS.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); //0x80000000 -> 0x80000000 - } - else { - int t1reg = _allocTempXMMreg(XMMT_INT, -1); - xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T)); - xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - xPSRA.D(xRegisterSSE(t1reg), 31); - xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); - xPSUB.D(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); //0x80000000 -> 0x80000000 - _freeXMMreg(t1reg); - } + xPABS.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); //0x80000000 -> 0x80000000 xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); //0x80000000 -> 0x7fffffff _freeXMMreg(t0reg); _clearNeededXMMregs(); @@ -1203,18 +1149,7 @@ void recPABSH() xPCMP.EQW(xRegisterSSE(t0reg), xRegisterSSE(t0reg)); xPSLL.W(xRegisterSSE(t0reg), 15); xPCMP.EQW(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); //0xffff if equal to 0x8000 - if( x86caps.hasSupplementalStreamingSIMD3Extensions ) { - xPABS.W(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); //0x8000 -> 0x8000 - } - else { - int t1reg = _allocTempXMMreg(XMMT_INT, -1); - xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T)); - xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - xPSRA.W(xRegisterSSE(t1reg), 15); - xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); - xPSUB.W(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); //0x8000 -> 0x8000 - _freeXMMreg(t1reg); - } + xPABS.W(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); //0x8000 -> 0x8000 xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); //0x8000 -> 0x7fff _freeXMMreg(t0reg); _clearNeededXMMregs(); @@ -1228,47 +1163,12 @@ void recPMINW() EE::Profiler.EmitOp(eeOpcode::PMINW); int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); - if ( x86caps.hasStreamingSIMD4Extensions ) { - if( EEREC_S == EEREC_T ) xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else if( EEREC_D == EEREC_S ) xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if ( EEREC_D == EEREC_T ) xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { - xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - } - } + if( EEREC_S == EEREC_T ) xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + else if( EEREC_D == EEREC_S ) xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); + else if ( EEREC_D == EEREC_T ) xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); else { - int t0reg; - - if( EEREC_S == EEREC_T ) { - xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - } - else { - t0reg = _allocTempXMMreg(XMMT_INT, -1); - xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); - xPCMP.GTD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); - - if( EEREC_D == EEREC_S ) { - xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); - xPANDN(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); - } - else if( EEREC_D == EEREC_T ) { - int t1reg = _allocTempXMMreg(XMMT_INT, -1); - xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T)); - xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); - xPANDN(xRegisterSSE(t0reg), xRegisterSSE(t1reg)); - _freeXMMreg(t1reg); - } - else { - xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); - xPANDN(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); - } - - xPOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); - _freeXMMreg(t0reg); - } + xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); + xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); } _clearNeededXMMregs(); } @@ -1718,12 +1618,6 @@ void recPMADDW() { EE::Profiler.EmitOp(eeOpcode::PMADDW); - if( !x86caps.hasStreamingSIMD4Extensions ) { - _deleteEEreg(_Rd_, 0); - recCall(Interp::PMADDW); - return; - } - int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI ); xSHUF.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_HI), 0x88); xPSHUF.D(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO), 0xd8); // LO = {LO[0], HI[0], LO[2], HI[2]} @@ -1775,18 +1669,8 @@ void recPSLLVW() xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); } else { - if ( x86caps.hasStreamingSIMD4Extensions ) { - xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88); - xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); - } - else { - int t0reg = _allocTempXMMreg(XMMT_INT, -1); - xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88); - xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_D)); - xPSRA.D(xRegisterSSE(t0reg), 31); - xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); - _freeXMMreg(t0reg); - } + xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88); + xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); } } else if( _Rt_ == 0 ) { @@ -1813,16 +1697,8 @@ void recPSLLVW() xPSLL.D(xRegisterSSE(t1reg), xRegisterSSE(t0reg)); // merge & sign extend - if ( x86caps.hasStreamingSIMD4Extensions ) { - xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); - xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); - } - else { - xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); - xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_D)); - xPSRA.D(xRegisterSSE(t0reg), 31); // get the signs - xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); - } + xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); + xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); _freeXMMreg(t0reg); _freeXMMreg(t1reg); @@ -1843,18 +1719,8 @@ void recPSRLVW() xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); } else { - if ( x86caps.hasStreamingSIMD4Extensions ) { - xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88); - xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); - } - else { - int t0reg = _allocTempXMMreg(XMMT_INT, -1); - xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88); - xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_D)); - xPSRA.D(xRegisterSSE(t0reg), 31); - xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); - _freeXMMreg(t0reg); - } + xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88); + xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); } } else if( _Rt_ == 0 ) { @@ -1881,16 +1747,8 @@ void recPSRLVW() xPSRL.D(xRegisterSSE(t1reg), xRegisterSSE(t0reg)); // merge & sign extend - if ( x86caps.hasStreamingSIMD4Extensions ) { - xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); - xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); - } - else { - xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); - xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_D)); - xPSRA.D(xRegisterSSE(t0reg), 31); // get the signs - xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); - } + xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); + xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); _freeXMMreg(t0reg); _freeXMMreg(t1reg); @@ -1903,11 +1761,6 @@ void recPMSUBW() { EE::Profiler.EmitOp(eeOpcode::PMSUBW); - if( !x86caps.hasStreamingSIMD4Extensions ) { - _deleteEEreg(_Rd_, 0); - recCall(Interp::PMSUBW); - return; - } int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI ); xSHUF.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_HI), 0x88); xPSHUF.D(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO), 0xd8); // LO = {LO[0], HI[0], LO[2], HI[2]} @@ -1957,11 +1810,6 @@ void recPMULTW() { EE::Profiler.EmitOp(eeOpcode::PMULTW); - if( !x86caps.hasStreamingSIMD4Extensions ) { - _deleteEEreg(_Rd_, 0); - recCall(Interp::PMULTW); - return; - } int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI ); if( !_Rs_ || !_Rt_ ) { if( _Rd_ ) xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); @@ -2455,18 +2303,8 @@ void recPSRAVW() xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); } else { - if ( x86caps.hasStreamingSIMD4Extensions ) { - xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88); - xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); - } - else { - int t0reg = _allocTempXMMreg(XMMT_INT, -1); - xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88); - xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_D)); - xPSRA.D(xRegisterSSE(t0reg), 31); - xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); - _freeXMMreg(t0reg); - } + xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88); + xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); } } else if( _Rt_ == 0 ) { diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index 355491f2eef60b..ba26361e04d3f6 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -508,8 +508,8 @@ static void recReserve() { // Hardware Requirements Check... - if ( !x86caps.hasStreamingSIMD2Extensions ) - recThrowHardwareDeficiency( L"SSE2" ); + if ( !x86caps.hasStreamingSIMD4Extensions ) + recThrowHardwareDeficiency( L"SSE4" ); recReserveCache(); } diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index e2d9cee3e8fbff..f5db622152130a 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -47,7 +47,7 @@ void mVUreserveCache(microVU& mVU) { // Only run this once per VU! ;) void mVUinit(microVU& mVU, uint vuIndex) { - if(!x86caps.hasStreamingSIMD2Extensions) mVUthrowHardwareDeficiency( L"SSE2", vuIndex ); + if(!x86caps.hasStreamingSIMD4Extensions) mVUthrowHardwareDeficiency( L"SSE4", vuIndex ); memzero(mVU.prog); diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index fa65645b6110de..1634c34cf87ed6 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -166,13 +166,7 @@ __fi void getQreg(const xmm& reg, int qInstance) __ri void writeQreg(const xmm& reg, int qInstance) { - if (qInstance) { - if (!x86caps.hasStreamingSIMD4Extensions) { - xPSHUF.D(xmmPQ, xmmPQ, 0xe1); - xMOVSS(xmmPQ, reg); - xPSHUF.D(xmmPQ, xmmPQ, 0xe1); - } - else xINSERTPS(xmmPQ, reg, _MM_MK_INSERTPS_NDX(0, 1, 0)); - } + if (qInstance) + xINSERTPS(xmmPQ, reg, _MM_MK_INSERTPS_NDX(0, 1, 0)); else xMOVSS(xmmPQ, reg); } diff --git a/pcsx2/x86/microVU_Clamp.inl b/pcsx2/x86/microVU_Clamp.inl index e1ebef153ba948..5a145ed373fe0f 100644 --- a/pcsx2/x86/microVU_Clamp.inl +++ b/pcsx2/x86/microVU_Clamp.inl @@ -56,33 +56,10 @@ void mVUclamp1(const xmm& reg, const xmm& regT1, int xyzw, bool bClampE = 0) { // so we just use a temporary mem location for our backup for now... (non-sse4 version only) void mVUclamp2(microVU& mVU, const xmm& reg, const xmm& regT1in, int xyzw, bool bClampE = 0) { if ((!clampE && CHECK_VU_SIGN_OVERFLOW) || (clampE && bClampE && CHECK_VU_SIGN_OVERFLOW)) { - if (x86caps.hasStreamingSIMD4Extensions) { - int i = (xyzw==1||xyzw==2||xyzw==4||xyzw==8) ? 0: 1; - xPMIN.SD(reg, ptr128[&sse4_maxvals[i][0]]); - xPMIN.UD(reg, ptr128[&sse4_minvals[i][0]]); - return; - } - //const xmm& regT1 = regT1b ? mVU.regAlloc->allocReg() : regT1in; - const xmm& regT1 = regT1in.IsEmpty() ? xmm((reg.Id + 1) % 8) : regT1in; - if (regT1 != regT1in) xMOVAPS(ptr128[mVU.xmmCTemp], regT1); - switch (xyzw) { - case 1: case 2: case 4: case 8: - xMOVAPS(regT1, reg); - xAND.PS(regT1, ptr128[mVUglob.signbit]); - xMIN.SS(reg, ptr128[mVUglob.maxvals]); - xMAX.SS(reg, ptr128[mVUglob.minvals]); - xOR.PS (reg, regT1); - break; - default: - xMOVAPS(regT1, reg); - xAND.PS(regT1, ptr128[mVUglob.signbit]); - xMIN.PS(reg, ptr128[mVUglob.maxvals]); - xMAX.PS(reg, ptr128[mVUglob.minvals]); - xOR.PS (reg, regT1); - break; - } - //if (regT1 != regT1in) mVU.regAlloc->clearNeeded(regT1); - if (regT1 != regT1in) xMOVAPS(regT1, ptr128[mVU.xmmCTemp]); + int i = (xyzw==1||xyzw==2||xyzw==4||xyzw==8) ? 0: 1; + xPMIN.SD(reg, ptr128[&sse4_maxvals[i][0]]); + xPMIN.UD(reg, ptr128[&sse4_minvals[i][0]]); + return; } else mVUclamp1(reg, regT1in, xyzw, bClampE); } diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 6a761f1b440078..850ddc9a3f0ef1 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -28,11 +28,7 @@ static __fi void testZero(const xmm& xmmReg, const xmm& xmmTemp, const x32& gprT { xXOR.PS(xmmTemp, xmmTemp); xCMPEQ.SS(xmmTemp, xmmReg); - if (!x86caps.hasStreamingSIMD4Extensions) { - xMOVMSKPS(gprTemp, xmmTemp); - xTEST(gprTemp, 1); - } - else xPTEST(xmmTemp, xmmTemp); + xPTEST(xmmTemp, xmmTemp); } // Test if Vector is Negative (Set Flags and Makes Positive) @@ -298,18 +294,8 @@ mVUop(mVU_EEXP) { // sumXYZ(): PQ.x = x ^ 2 + y ^ 2 + z ^ 2 static __fi void mVU_sumXYZ(mV, const xmm& PQ, const xmm& Fs) { - if (x86caps.hasStreamingSIMD4Extensions) { - xDP.PS(Fs, Fs, 0x71); - xMOVSS(PQ, Fs); - } - else { - SSE_MULPS(mVU, Fs, Fs); // wzyx ^ 2 - xMOVSS (PQ, Fs); // x ^ 2 - xPSHUF.D (Fs, Fs, 0xe1); // wzyx -> wzxy - SSE_ADDSS(mVU, PQ, Fs); // x ^ 2 + y ^ 2 - xPSHUF.D (Fs, Fs, 0xd2); // wzxy -> wxyz - SSE_ADDSS(mVU, PQ, Fs); // x ^ 2 + y ^ 2 + z ^ 2 - } + xDP.PS(Fs, Fs, 0x71); + xMOVSS(PQ, Fs); } mVUop(mVU_ELENG) { diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index 98a945131ca9bf..47c4a3b5e2fa4b 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -59,72 +59,29 @@ void mVUsaveReg(const xmm& reg, xAddressVoid ptr, int xyzw, bool modXYZW) return;*/ switch ( xyzw ) { - case 5: if (x86caps.hasStreamingSIMD4Extensions) { - xEXTRACTPS(ptr32[ptr+4], reg, 1); - xEXTRACTPS(ptr32[ptr+12], reg, 3); - } - else { - xPSHUF.D(reg, reg, 0xe1); //WZXY - xMOVSS(ptr32[ptr+4], reg); - xPSHUF.D(reg, reg, 0xff); //WWWW - xMOVSS(ptr32[ptr+12], reg); - } + case 5: xEXTRACTPS(ptr32[ptr+4], reg, 1); + xEXTRACTPS(ptr32[ptr+12], reg, 3); break; // YW case 6: xPSHUF.D(reg, reg, 0xc9); xMOVL.PS(ptr64[ptr+4], reg); break; // YZ - case 7: if (x86caps.hasStreamingSIMD4Extensions) { - xMOVH.PS(ptr64[ptr+8], reg); - xEXTRACTPS(ptr32[ptr+4], reg, 1); - } - else { - xPSHUF.D(reg, reg, 0x93); //ZYXW - xMOVH.PS(ptr64[ptr+4], reg); - xMOVSS(ptr32[ptr+12], reg); - } + case 7: xMOVH.PS(ptr64[ptr+8], reg); + xEXTRACTPS(ptr32[ptr+4], reg, 1); break; // YZW - case 9: if (x86caps.hasStreamingSIMD4Extensions) { - xMOVSS(ptr32[ptr], reg); - xEXTRACTPS(ptr32[ptr+12], reg, 3); - } - else { - xMOVSS(ptr32[ptr], reg); - xPSHUF.D(reg, reg, 0xff); //WWWW - xMOVSS(ptr32[ptr+12], reg); - } + case 9: xMOVSS(ptr32[ptr], reg); + xEXTRACTPS(ptr32[ptr+12], reg, 3); break; // XW - case 10: if (x86caps.hasStreamingSIMD4Extensions) { - xMOVSS(ptr32[ptr], reg); - xEXTRACTPS(ptr32[ptr+8], reg, 2); - } - else { - xMOVSS(ptr32[ptr], reg); - xMOVHL.PS(reg, reg); - xMOVSS(ptr32[ptr+8], reg); - } + case 10: xMOVSS(ptr32[ptr], reg); + xEXTRACTPS(ptr32[ptr+8], reg, 2); break; //XZ case 11: xMOVSS(ptr32[ptr], reg); xMOVH.PS(ptr64[ptr+8], reg); break; //XZW - case 13: if (x86caps.hasStreamingSIMD4Extensions) { - xMOVL.PS(ptr64[ptr], reg); - xEXTRACTPS(ptr32[ptr+12], reg, 3); - } - else { - xPSHUF.D(reg, reg, 0x4b); //YXZW - xMOVH.PS(ptr64[ptr], reg); - xMOVSS(ptr32[ptr+12], reg); - } + case 13: xMOVL.PS(ptr64[ptr], reg); + xEXTRACTPS(ptr32[ptr+12], reg, 3); break; // XYW - case 14: if (x86caps.hasStreamingSIMD4Extensions) { - xMOVL.PS(ptr64[ptr], reg); - xEXTRACTPS(ptr32[ptr+8], reg, 2); - } - else { - xMOVL.PS(ptr64[ptr], reg); - xMOVHL.PS(reg, reg); - xMOVSS(ptr32[ptr+8], reg); - } + case 14: xMOVL.PS(ptr64[ptr], reg); + xEXTRACTPS(ptr32[ptr+8], reg, 2); break; // XYZ case 4: if (!modXYZW) mVUunpack_xyzw(reg, reg, 1); xMOVSS(ptr32[ptr+4], reg); @@ -146,8 +103,14 @@ void mVUsaveReg(const xmm& reg, xAddressVoid ptr, int xyzw, bool modXYZW) void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW) { xyzw &= 0xf; - if ( (dest != src) && (xyzw != 0) ) { - if (x86caps.hasStreamingSIMD4Extensions && (xyzw != 0x8) && (xyzw != 0xf)) { + if ( (dest != src) && (xyzw != 0) ) + { + if (xyzw == 0x8) + xMOVSS(dest, src); + else if (xyzw == 0xf) + xMOVAPS(dest, src); + else + { if (modXYZW) { if (xyzw == 1) { xINSERTPS(dest, src, _MM_MK_INSERTPS_NDX(0, 3, 0)); return; } else if (xyzw == 2) { xINSERTPS(dest, src, _MM_MK_INSERTPS_NDX(0, 2, 0)); return; } @@ -156,56 +119,6 @@ void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW) xyzw = ((xyzw & 1) << 3) | ((xyzw & 2) << 1) | ((xyzw & 4) >> 1) | ((xyzw & 8) >> 3); xBLEND.PS(dest, src, xyzw); } - else { - switch (xyzw) { - case 1: if (modXYZW) mVUunpack_xyzw(src, src, 0); - xMOVHL.PS(src, dest); // src = Sw Sz Dw Dz - xSHUF.PS(dest, src, 0xc4); // 11 00 01 00 - break; - case 2: if (modXYZW) mVUunpack_xyzw(src, src, 0); - xMOVHL.PS(src, dest); - xSHUF.PS(dest, src, 0x64); - break; - case 3: xSHUF.PS(dest, src, 0xe4); - break; - case 4: if (modXYZW) mVUunpack_xyzw(src, src, 0); - xMOVSS(src, dest); - xMOVSD(dest, src); - break; - case 5: xSHUF.PS(dest, src, 0xd8); - xPSHUF.D(dest, dest, 0xd8); - break; - case 6: xSHUF.PS(dest, src, 0x9c); - xPSHUF.D(dest, dest, 0x78); - break; - case 7: xMOVSS(src, dest); - xMOVAPS(dest, src); - break; - case 8: xMOVSS(dest, src); - break; - case 9: xSHUF.PS(dest, src, 0xc9); - xPSHUF.D(dest, dest, 0xd2); - break; - case 10: xSHUF.PS(dest, src, 0x8d); - xPSHUF.D(dest, dest, 0x72); - break; - case 11: xMOVSS(dest, src); - xSHUF.PS(dest, src, 0xe4); - break; - case 12: xMOVSD(dest, src); - break; - case 13: xMOVHL.PS(dest, src); - xSHUF.PS(src, dest, 0x64); - xMOVAPS(dest, src); - break; - case 14: xMOVHL.PS(dest, src); - xSHUF.PS(src, dest, 0xc4); - xMOVAPS(dest, src); - break; - default: xMOVAPS(dest, src); - break; - } - } } } diff --git a/pcsx2/x86/newVif_UnpackSSE.cpp b/pcsx2/x86/newVif_UnpackSSE.cpp index fad4a67ce60d4e..d59c52328d8587 100644 --- a/pcsx2/x86/newVif_UnpackSSE.cpp +++ b/pcsx2/x86/newVif_UnpackSSE.cpp @@ -35,23 +35,7 @@ static RecompiledCodeReserve* nVifUpkExec = NULL; // Merges xmm vectors without modifying source reg void mergeVectors(xRegisterSSE dest, xRegisterSSE src, xRegisterSSE temp, int xyzw) { - if (x86caps.hasStreamingSIMD4Extensions || (xyzw==15) - || (xyzw==12) || (xyzw==11) || (xyzw==8) || (xyzw==3)) { - mVUmergeRegs(dest, src, xyzw); - } - else - { - if(temp != src) xMOVAPS(temp, src); //Sometimes we don't care if the source is modified and is temp reg. - if(dest == temp) - { - //VIF can sent the temp directory as the source and destination, just need to clear the ones we dont want in which case. - if(!(xyzw & 0x1)) xAND.PS( dest, ptr128[SSEXYZWMask[0]]); - if(!(xyzw & 0x2)) xAND.PS( dest, ptr128[SSEXYZWMask[1]]); - if(!(xyzw & 0x4)) xAND.PS( dest, ptr128[SSEXYZWMask[2]]); - if(!(xyzw & 0x8)) xAND.PS( dest, ptr128[SSEXYZWMask[3]]); - } - else mVUmergeRegs(dest, temp, xyzw); - } + mVUmergeRegs(dest, src, xyzw); } // ===================================================================================================== @@ -113,16 +97,6 @@ void VifUnpackSSE_Base::xUPK_S_32() const { void VifUnpackSSE_Base::xUPK_S_16() const { - if (!x86caps.hasStreamingSIMD4Extensions) - { - xMOV16 (workReg, ptr32[srcIndirect]); - xPUNPCK.LWD(workReg, workReg); - xShiftR (workReg, 16); - - xPSHUF.D (destReg, workReg, _v0); - return; - } - switch(UnpkLoopIteration) { case 0: @@ -144,17 +118,6 @@ void VifUnpackSSE_Base::xUPK_S_16() const { void VifUnpackSSE_Base::xUPK_S_8() const { - if (!x86caps.hasStreamingSIMD4Extensions) - { - xMOV8 (workReg, ptr32[srcIndirect]); - xPUNPCK.LBW(workReg, workReg); - xPUNPCK.LWD(workReg, workReg); - xShiftR (workReg, 24); - - xPSHUF.D (destReg, workReg, _v0); - return; - } - switch(UnpkLoopIteration) { case 0: @@ -200,18 +163,8 @@ void VifUnpackSSE_Base::xUPK_V2_16() const { if(UnpkLoopIteration == 0) { - if (x86caps.hasStreamingSIMD4Extensions) - { - xPMOVXX16 (workReg); - - } - else - { - xMOV64 (workReg, ptr64[srcIndirect]); - xPUNPCK.LWD(workReg, workReg); - xShiftR (workReg, 16); - } - xPSHUF.D (destReg, workReg, 0x44); //v1v0v1v0 + xPMOVXX16 (workReg); + xPSHUF.D (destReg, workReg, 0x44); //v1v0v1v0 } else { @@ -223,19 +176,9 @@ void VifUnpackSSE_Base::xUPK_V2_16() const { void VifUnpackSSE_Base::xUPK_V2_8() const { - if(UnpkLoopIteration == 0 || !x86caps.hasStreamingSIMD4Extensions) + if(UnpkLoopIteration == 0) { - if (x86caps.hasStreamingSIMD4Extensions) - { - xPMOVXX8 (workReg); - } - else - { - xMOV16 (workReg, ptr32[srcIndirect]); - xPUNPCK.LBW(workReg, workReg); - xPUNPCK.LWD(workReg, workReg); - xShiftR (workReg, 24); - } + xPMOVXX8 (workReg); xPSHUF.D (destReg, workReg, 0x44); //v1v0v1v0 } else @@ -254,16 +197,7 @@ void VifUnpackSSE_Base::xUPK_V3_32() const { void VifUnpackSSE_Base::xUPK_V3_16() const { - if (x86caps.hasStreamingSIMD4Extensions) - { - xPMOVXX16 (destReg); - } - else - { - xMOV64 (destReg, ptr32[srcIndirect]); - xPUNPCK.LWD(destReg, destReg); - xShiftR (destReg, 16); - } + xPMOVXX16 (destReg); //With V3-16, it takes the first vector from the next position as the W vector //However - IF the end of this iteration of the unpack falls on a quadword boundary, W becomes 0 @@ -278,17 +212,7 @@ void VifUnpackSSE_Base::xUPK_V3_16() const { void VifUnpackSSE_Base::xUPK_V3_8() const { - if (x86caps.hasStreamingSIMD4Extensions) - { - xPMOVXX8 (destReg); - } - else - { - xMOV32 (destReg, ptr32[srcIndirect]); - xPUNPCK.LBW(destReg, destReg); - xPUNPCK.LWD(destReg, destReg); - xShiftR (destReg, 24); - } + xPMOVXX8 (destReg); if (UnpkLoopIteration != IsAligned) xAND.PS(destReg, ptr128[SSEXYZWMask[0]]); } @@ -300,31 +224,12 @@ void VifUnpackSSE_Base::xUPK_V4_32() const { void VifUnpackSSE_Base::xUPK_V4_16() const { - if (x86caps.hasStreamingSIMD4Extensions) - { - xPMOVXX16 (destReg); - } - else - { - xMOV64 (destReg, ptr32[srcIndirect]); - xPUNPCK.LWD(destReg, destReg); - xShiftR (destReg, 16); - } + xPMOVXX16 (destReg); } void VifUnpackSSE_Base::xUPK_V4_8() const { - if (x86caps.hasStreamingSIMD4Extensions) - { - xPMOVXX8 (destReg); - } - else - { - xMOV32 (destReg, ptr32[srcIndirect]); - xPUNPCK.LBW(destReg, destReg); - xPUNPCK.LWD(destReg, destReg); - xShiftR (destReg, 24); - } + xPMOVXX8 (destReg); } void VifUnpackSSE_Base::xUPK_V4_5() const { diff --git a/plugins/GSdx/CMakeLists.txt b/plugins/GSdx/CMakeLists.txt index d370c95cf5c5f4..ac97b581ad8d8a 100644 --- a/plugins/GSdx/CMakeLists.txt +++ b/plugins/GSdx/CMakeLists.txt @@ -270,9 +270,7 @@ if(BUILTIN_GS) else() if (DISABLE_ADVANCE_SIMD) # Don't append -SSE2 on the first build to keep same name as SIMD build - add_pcsx2_plugin("${Output}" "${GSdxFinalSources}" "${GSdxFinalLibs}" "${GSdxFinalFlags}") - add_pcsx2_plugin("${Output}-SSE4" "${GSdxFinalSources}" "${GSdxFinalLibs}" "${GSdxFinalFlags} -mssse3 -msse4 -msse4.1") - target_compile_features("${Output}-SSE4" PRIVATE cxx_std_17) + add_pcsx2_plugin("${Output}" "${GSdxFinalSources}" "${GSdxFinalLibs}" "${GSdxFinalFlags} -msse3 -msse4 -msse4.1") add_pcsx2_plugin("${Output}-AVX2" "${GSdxFinalSources}" "${GSdxFinalLibs}" "${GSdxFinalFlags} -mavx -mavx2 -mbmi -mbmi2") target_compile_features("${Output}-AVX2" PRIVATE cxx_std_17) else() diff --git a/plugins/GSdx/GSBlock.h b/plugins/GSdx/GSBlock.h index bf76b6842fb9fa..99b255bcbbe0c9 100644 --- a/plugins/GSdx/GSBlock.h +++ b/plugins/GSdx/GSBlock.h @@ -519,6 +519,35 @@ class GSBlock template __forceinline static void ReadColumn8(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) { + + //for(int j = 0; j < 64; j++) ((uint8*)src)[j] = (uint8)j; + + #if 0 //_M_SSE >= 0x501 + + const GSVector8i* s = (const GSVector8i*)src; + + GSVector8i v0 = s[i * 2 + 0]; + GSVector8i v1 = s[i * 2 + 1]; + + GSVector8i::sw8(v0, v1); + GSVector8i::sw16(v0, v1); + GSVector8i::sw8(v0, v1); + GSVector8i::sw128(v0, v1); + GSVector8i::sw16(v0, v1); + + v0 = v0.acbd(); + v1 = v1.acbd(); + v1 = v1.yxwz(); + + GSVector8i::storel(&dst[dstpitch * 0], v0); + GSVector8i::storeh(&dst[dstpitch * 1], v0); + GSVector8i::storel(&dst[dstpitch * 2], v1); + GSVector8i::storeh(&dst[dstpitch * 3], v1); + + // TODO: not sure if this is worth it, not in this form, there should be a shorter path + + #else + const GSVector4i* s = (const GSVector4i*)src; GSVector4i v0, v1, v2, v3; @@ -550,6 +579,8 @@ class GSBlock GSVector4i::store(&dst[dstpitch * 1], v3); GSVector4i::store(&dst[dstpitch * 2], v1); GSVector4i::store(&dst[dstpitch * 3], v2); + + #endif } template __forceinline static void ReadColumn4(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) diff --git a/plugins/GSdx/GSState.cpp b/plugins/GSdx/GSState.cpp index ccea31e96b00a2..cdaee1f3411266 100644 --- a/plugins/GSdx/GSState.cpp +++ b/plugins/GSdx/GSState.cpp @@ -2734,11 +2734,7 @@ __forceinline void GSState::VertexKick(uint32 skip) GSVector4i xy = v1.xxxx().u16to32().sub32(m_ofxy); -#if _M_SSE >= 0x401 GSVector4i::storel(&m_vertex.xy[xy_tail & 3], xy.blend16<0xf0>(xy.sra32(4)).ps32()); -#else - GSVector4i::storel(&m_vertex.xy[xy_tail & 3], xy.upl64(xy.sra32(4).zwzw()).ps32()); -#endif m_vertex.tail = ++tail; m_vertex.xy_tail = ++xy_tail; diff --git a/plugins/GSdx/GSUtil.cpp b/plugins/GSdx/GSUtil.cpp index b5f0f06d2c8074..08f8a352cd4d20 100644 --- a/plugins/GSdx/GSUtil.cpp +++ b/plugins/GSdx/GSUtil.cpp @@ -85,10 +85,6 @@ const char* GSUtil::GetLibName() "AVX", sw_sse #elif _M_SSE >= 0x401 "SSE4.1", sw_sse -#elif _M_SSE >= 0x301 - "SSSE3", sw_sse -#elif _M_SSE >= 0x200 - "SSE2", sw_sse #endif ); @@ -221,13 +217,7 @@ bool GSUtil::CheckSSE() }; ISA checks[] = { - {Xbyak::util::Cpu::tSSE2, "SSE2"}, -#if _M_SSE >= 0x301 - {Xbyak::util::Cpu::tSSSE3, "SSSE3"}, -#endif -#if _M_SSE >= 0x401 {Xbyak::util::Cpu::tSSE41, "SSE41"}, -#endif #if _M_SSE >= 0x500 {Xbyak::util::Cpu::tAVX, "AVX1"}, #endif diff --git a/plugins/GSdx/GSVector4.h b/plugins/GSdx/GSVector4.h index 8ff821c7adf4c3..77422cb351c997 100644 --- a/plugins/GSdx/GSVector4.h +++ b/plugins/GSdx/GSVector4.h @@ -316,22 +316,54 @@ class alignas(16) GSVector4 __forceinline GSVector4 madd(const GSVector4& a, const GSVector4& b) const { + #if 0//_M_SSE >= 0x501 + + return GSVector4(_mm_fmadd_ps(m, a, b)); + + #else + return *this * a + b; + + #endif } __forceinline GSVector4 msub(const GSVector4& a, const GSVector4& b) const { + #if 0//_M_SSE >= 0x501 + + return GSVector4(_mm_fmsub_ps(m, a, b)); + + #else + return *this * a - b; + + #endif } __forceinline GSVector4 nmadd(const GSVector4& a, const GSVector4& b) const { + #if 0//_M_SSE >= 0x501 + + return GSVector4(_mm_fnmadd_ps(m, a, b)); + + #else + return b - *this * a; + + #endif } __forceinline GSVector4 nmsub(const GSVector4& a, const GSVector4& b) const { + #if 0//_M_SSE >= 0x501 + + return GSVector4(_mm_fnmsub_ps(m, a, b)); + + #else + return -b - *this * a; + + #endif } __forceinline GSVector4 addm(const GSVector4& a, const GSVector4& b) const @@ -460,16 +492,10 @@ class alignas(16) GSVector4 return _mm_testz_ps(m, m) != 0; - #elif _M_SSE >= 0x401 - __m128i a = _mm_castps_si128(m); return _mm_testz_si128(a, a) != 0; - #else - - return mask() == 0; - #endif } @@ -482,6 +508,14 @@ class alignas(16) GSVector4 { // TODO: use blendps when src == dst + #if 0 // _M_SSE >= 0x401 + + // NOTE: it's faster with shuffles... + + return GSVector4(_mm_insert_ps(m, v.m, _MM_MK_INSERTPS_NDX(src, dst, 0))); + + #else + switch(dst) { case 0: @@ -527,6 +561,8 @@ class alignas(16) GSVector4 default: __assume(0); } + + #endif } #ifdef __linux__ diff --git a/plugins/GSdx/GSVector4i.h b/plugins/GSdx/GSVector4i.h index 7a83d92ed0e908..e3c6dc5808d353 100644 --- a/plugins/GSdx/GSVector4i.h +++ b/plugins/GSdx/GSVector4i.h @@ -533,7 +533,15 @@ class alignas(16) GSVector4i __forceinline GSVector4i upl8() const { + #if 0 // _M_SSE >= 0x401 // TODO: compiler bug + + return GSVector4i(_mm_cvtepu8_epi16(m)); + + #else + return GSVector4i(_mm_unpacklo_epi8(m, _mm_setzero_si128())); + + #endif } __forceinline GSVector4i uph8() const @@ -543,7 +551,15 @@ class alignas(16) GSVector4i __forceinline GSVector4i upl16() const { + #if 0 //_M_SSE >= 0x401 // TODO: compiler bug + + return GSVector4i(_mm_cvtepu16_epi32(m)); + + #else + return GSVector4i(_mm_unpacklo_epi16(m, _mm_setzero_si128())); + + #endif } __forceinline GSVector4i uph16() const @@ -553,7 +569,15 @@ class alignas(16) GSVector4i __forceinline GSVector4i upl32() const { + #if 0 //_M_SSE >= 0x401 // TODO: compiler bug + + return GSVector4i(_mm_cvtepu32_epi64(m)); + + #else + return GSVector4i(_mm_unpacklo_epi32(m, _mm_setzero_si128())); + + #endif } __forceinline GSVector4i uph32() const diff --git a/plugins/GSdx/Renderers/Common/GSVertexTrace.cpp b/plugins/GSdx/Renderers/Common/GSVertexTrace.cpp index 4df7c0e32b6586..b093811c7e19de 100644 --- a/plugins/GSdx/Renderers/Common/GSVertexTrace.cpp +++ b/plugins/GSdx/Renderers/Common/GSVertexTrace.cpp @@ -175,18 +175,9 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun GSVector4i cmin = GSVector4i::xffffffff(); GSVector4i cmax = GSVector4i::zero(); - #if _M_SSE >= 0x401 - GSVector4i pmin = GSVector4i::xffffffff(); GSVector4i pmax = GSVector4i::zero(); - #else - - GSVector4 pmin = s_minmax.xxxx(); - GSVector4 pmax = s_minmax.yyyy(); - - #endif - const GSVertex* RESTRICT v = (GSVertex*)vertex; for(int i = 0; i < count; i += n) @@ -233,21 +224,10 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun GSVector4i xy = xyzf.upl16(); GSVector4i z = xyzf.yyyy(); - #if _M_SSE >= 0x401 - GSVector4i p = xy.blend16<0xf0>(z.uph32(xyzf)); pmin = pmin.min_u32(p); pmax = pmax.max_u32(p); - - #else - - GSVector4 p = GSVector4(xy.upl64(z.srl32(1).upl32(xyzf.wwww()))); - - pmin = pmin.min(p); - pmax = pmax.max(p); - - #endif } else if(primclass == GS_LINE_CLASS) { @@ -314,23 +294,11 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun GSVector4i xy1 = xyzf1.upl16(); GSVector4i z1 = xyzf1.yyyy(); - #if _M_SSE >= 0x401 - GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf0)); GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1)); pmin = pmin.min_u32(p0.min_u32(p1)); pmax = pmax.max_u32(p0.max_u32(p1)); - - #else - - GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf0.wwww()))); - GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww()))); - - pmin = pmin.min(p0.min(p1)); - pmax = pmax.max(p0.max(p1)); - - #endif } else if(primclass == GS_TRIANGLE_CLASS) { @@ -406,25 +374,12 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun GSVector4i xy2 = xyzf2.upl16(); GSVector4i z2 = xyzf2.yyyy(); - #if _M_SSE >= 0x401 - GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf0)); GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1)); GSVector4i p2 = xy2.blend16<0xf0>(z2.uph32(xyzf2)); pmin = pmin.min_u32(p2).min_u32(p0.min_u32(p1)); pmax = pmax.max_u32(p2).max_u32(p0.max_u32(p1)); - - #else - - GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf0.wwww()))); - GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww()))); - GSVector4 p2 = GSVector4(xy2.upl64(z2.srl32(1).upl32(xyzf2.wwww()))); - - pmin = pmin.min(p2).min(p0.min(p1)); - pmax = pmax.max(p2).max(p0.max(p1)); - - #endif } else if(primclass == GS_SPRITE_CLASS) { @@ -491,23 +446,11 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun GSVector4i xy1 = xyzf1.upl16(); GSVector4i z1 = xyzf1.yyyy(); - #if _M_SSE >= 0x401 - GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf1)); GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1)); pmin = pmin.min_u32(p0.min_u32(p1)); pmax = pmax.max_u32(p0.max_u32(p1)); - - #else - - GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf1.wwww()))); - GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww()))); - - pmin = pmin.min(p0.min(p1)); - pmax = pmax.max(p0.max(p1)); - - #endif } } @@ -516,13 +459,9 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun // be true if depth isn't constant but close enough. It also imply that // pmin.z & 1 == 0 and pax.z & 1 == 0 - #if _M_SSE >= 0x401 - pmin = pmin.blend16<0x30>(pmin.srl32(1)); pmax = pmax.blend16<0x30>(pmax.srl32(1)); - #endif - GSVector4 o(context->XYOFFSET); GSVector4 s(1.0f / 16, 1.0f / 16, 2.0f, 1.0f); diff --git a/plugins/GSdx/Renderers/DX11/GSRendererDX11.cpp b/plugins/GSdx/Renderers/DX11/GSRendererDX11.cpp index 91726a8490ea77..842e4e329f7032 100644 --- a/plugins/GSdx/Renderers/DX11/GSRendererDX11.cpp +++ b/plugins/GSdx/Renderers/DX11/GSRendererDX11.cpp @@ -944,12 +944,8 @@ void GSRendererDX11::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sou m_ps_sel.fog = 1; GSVector4 fc = GSVector4::rgba32(m_env.FOGCOL.u32[0]); -#if _M_SSE >= 0x401 // Blend AREF to avoid to load a random value for alpha (dirty cache) ps_cb.FogColor_AREF = fc.blend32<8>(ps_cb.FogColor_AREF); -#else - ps_cb.FogColor_AREF = fc; -#endif } // Warning must be done after EmulateZbuffer diff --git a/plugins/GSdx/Renderers/OpenGL/GSRendererOGL.cpp b/plugins/GSdx/Renderers/OpenGL/GSRendererOGL.cpp index 6173e291d8f59b..57657f6b1ab484 100644 --- a/plugins/GSdx/Renderers/OpenGL/GSRendererOGL.cpp +++ b/plugins/GSdx/Renderers/OpenGL/GSRendererOGL.cpp @@ -1180,12 +1180,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour m_ps_sel.fog = 1; GSVector4 fc = GSVector4::rgba32(m_env.FOGCOL.u32[0]); -#if _M_SSE >= 0x401 // Blend AREF to avoid to load a random value for alpha (dirty cache) ps_cb.FogColor_AREF = fc.blend32<8>(ps_cb.FogColor_AREF); -#else - ps_cb.FogColor_AREF = fc; -#endif } // Warning must be done after EmulateZbuffer