diff --git a/PCSX2_suite.sln b/PCSX2_suite.sln
index 591b0571444c0..862a07eda5f11 100644
--- a/PCSX2_suite.sln
+++ b/PCSX2_suite.sln
@@ -77,8 +77,6 @@ Global
Devel|x64 = Devel|x64
Release AVX2|Win32 = Release AVX2|Win32
Release AVX2|x64 = Release AVX2|x64
- Release SSE4|Win32 = Release SSE4|Win32
- Release SSE4|x64 = Release SSE4|x64
Release|Win32 = Release|Win32
Release|x64 = Release|x64
EndGlobalSection
@@ -95,10 +93,6 @@ Global
{1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release AVX2|Win32.Build.0 = Release|Win32
{1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release AVX2|x64.ActiveCfg = Release|x64
{1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release AVX2|x64.Build.0 = Release|x64
- {1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release SSE4|Win32.ActiveCfg = Release|Win32
- {1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release SSE4|Win32.Build.0 = Release|Win32
- {1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release SSE4|x64.ActiveCfg = Release|x64
- {1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release SSE4|x64.Build.0 = Release|x64
{1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release|Win32.ActiveCfg = Release|Win32
{1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release|Win32.Build.0 = Release|Win32
{1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release|x64.ActiveCfg = Release|x64
@@ -115,10 +109,6 @@ Global
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release AVX2|Win32.Build.0 = Release AVX2|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release AVX2|x64.ActiveCfg = Release AVX2|x64
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release AVX2|x64.Build.0 = Release AVX2|x64
- {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE4|Win32.ActiveCfg = Release SSE4|Win32
- {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE4|Win32.Build.0 = Release SSE4|Win32
- {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE4|x64.ActiveCfg = Release SSE4|x64
- {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE4|x64.Build.0 = Release SSE4|x64
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release|Win32.ActiveCfg = Release|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release|Win32.Build.0 = Release|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release|x64.ActiveCfg = Release|x64
@@ -135,10 +125,6 @@ Global
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release AVX2|Win32.Build.0 = Release|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release AVX2|x64.ActiveCfg = Release|x64
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release AVX2|x64.Build.0 = Release|x64
- {E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release SSE4|Win32.ActiveCfg = Release|Win32
- {E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release SSE4|Win32.Build.0 = Release|Win32
- {E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release SSE4|x64.ActiveCfg = Release|x64
- {E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release SSE4|x64.Build.0 = Release|x64
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release|Win32.ActiveCfg = Release|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release|Win32.Build.0 = Release|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release|x64.ActiveCfg = Release|x64
@@ -155,10 +141,6 @@ Global
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Release AVX2|Win32.Build.0 = Release|Win32
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Release AVX2|x64.ActiveCfg = Release|x64
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Release AVX2|x64.Build.0 = Release|x64
- {2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Release SSE4|Win32.ActiveCfg = Release|Win32
- {2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Release SSE4|Win32.Build.0 = Release|Win32
- {2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Release SSE4|x64.ActiveCfg = Release|x64
- {2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Release SSE4|x64.Build.0 = Release|x64
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Release|Win32.ActiveCfg = Release|Win32
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Release|Win32.Build.0 = Release|Win32
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Release|x64.ActiveCfg = Release|x64
@@ -175,10 +157,6 @@ Global
{A51123F5-9505-4EAE-85E7-D320290A272C}.Release AVX2|Win32.Build.0 = Release|Win32
{A51123F5-9505-4EAE-85E7-D320290A272C}.Release AVX2|x64.ActiveCfg = Release|x64
{A51123F5-9505-4EAE-85E7-D320290A272C}.Release AVX2|x64.Build.0 = Release|x64
- {A51123F5-9505-4EAE-85E7-D320290A272C}.Release SSE4|Win32.ActiveCfg = Release|Win32
- {A51123F5-9505-4EAE-85E7-D320290A272C}.Release SSE4|Win32.Build.0 = Release|Win32
- {A51123F5-9505-4EAE-85E7-D320290A272C}.Release SSE4|x64.ActiveCfg = Release|x64
- {A51123F5-9505-4EAE-85E7-D320290A272C}.Release SSE4|x64.Build.0 = Release|x64
{A51123F5-9505-4EAE-85E7-D320290A272C}.Release|Win32.ActiveCfg = Release|Win32
{A51123F5-9505-4EAE-85E7-D320290A272C}.Release|Win32.Build.0 = Release|Win32
{A51123F5-9505-4EAE-85E7-D320290A272C}.Release|x64.ActiveCfg = Release|x64
@@ -195,10 +173,6 @@ Global
{4639972E-424E-4E13-8B07-CA403C481346}.Release AVX2|Win32.Build.0 = Release|Win32
{4639972E-424E-4E13-8B07-CA403C481346}.Release AVX2|x64.ActiveCfg = Release|x64
{4639972E-424E-4E13-8B07-CA403C481346}.Release AVX2|x64.Build.0 = Release|x64
- {4639972E-424E-4E13-8B07-CA403C481346}.Release SSE4|Win32.ActiveCfg = Release|Win32
- {4639972E-424E-4E13-8B07-CA403C481346}.Release SSE4|Win32.Build.0 = Release|Win32
- {4639972E-424E-4E13-8B07-CA403C481346}.Release SSE4|x64.ActiveCfg = Release|x64
- {4639972E-424E-4E13-8B07-CA403C481346}.Release SSE4|x64.Build.0 = Release|x64
{4639972E-424E-4E13-8B07-CA403C481346}.Release|Win32.ActiveCfg = Release|Win32
{4639972E-424E-4E13-8B07-CA403C481346}.Release|Win32.Build.0 = Release|Win32
{4639972E-424E-4E13-8B07-CA403C481346}.Release|x64.ActiveCfg = Release|x64
@@ -215,10 +189,6 @@ Global
{677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Release AVX2|Win32.Build.0 = Release|Win32
{677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Release AVX2|x64.ActiveCfg = Release|x64
{677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Release AVX2|x64.Build.0 = Release|x64
- {677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Release SSE4|Win32.ActiveCfg = Release|Win32
- {677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Release SSE4|Win32.Build.0 = Release|Win32
- {677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Release SSE4|x64.ActiveCfg = Release|x64
- {677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Release SSE4|x64.Build.0 = Release|x64
{677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Release|Win32.ActiveCfg = Release|Win32
{677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Release|Win32.Build.0 = Release|Win32
{677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Release|x64.ActiveCfg = Release|x64
@@ -235,10 +205,6 @@ Global
{BC236261-77E8-4567-8D09-45CD02965EB6}.Release AVX2|Win32.Build.0 = Release|Win32
{BC236261-77E8-4567-8D09-45CD02965EB6}.Release AVX2|x64.ActiveCfg = Release|x64
{BC236261-77E8-4567-8D09-45CD02965EB6}.Release AVX2|x64.Build.0 = Release|x64
- {BC236261-77E8-4567-8D09-45CD02965EB6}.Release SSE4|Win32.ActiveCfg = Release|Win32
- {BC236261-77E8-4567-8D09-45CD02965EB6}.Release SSE4|Win32.Build.0 = Release|Win32
- {BC236261-77E8-4567-8D09-45CD02965EB6}.Release SSE4|x64.ActiveCfg = Release|x64
- {BC236261-77E8-4567-8D09-45CD02965EB6}.Release SSE4|x64.Build.0 = Release|x64
{BC236261-77E8-4567-8D09-45CD02965EB6}.Release|Win32.ActiveCfg = Release|Win32
{BC236261-77E8-4567-8D09-45CD02965EB6}.Release|Win32.Build.0 = Release|Win32
{BC236261-77E8-4567-8D09-45CD02965EB6}.Release|x64.ActiveCfg = Release|x64
@@ -255,10 +221,6 @@ Global
{0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release AVX2|Win32.Build.0 = Release (NO ASIO)|Win32
{0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release AVX2|x64.ActiveCfg = Release (NO ASIO)|x64
{0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release AVX2|x64.Build.0 = Release (NO ASIO)|x64
- {0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release SSE4|Win32.ActiveCfg = Release (NO ASIO)|Win32
- {0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release SSE4|Win32.Build.0 = Release (NO ASIO)|Win32
- {0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release SSE4|x64.ActiveCfg = Release (NO ASIO)|x64
- {0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release SSE4|x64.Build.0 = Release (NO ASIO)|x64
{0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release|Win32.ActiveCfg = Release (NO ASIO)|Win32
{0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release|Win32.Build.0 = Release (NO ASIO)|Win32
{0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release|x64.ActiveCfg = Release (NO ASIO)|x64
@@ -275,10 +237,6 @@ Global
{01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Release AVX2|Win32.Build.0 = Release|Win32
{01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Release AVX2|x64.ActiveCfg = Release|x64
{01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Release AVX2|x64.Build.0 = Release|x64
- {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Release SSE4|Win32.ActiveCfg = Release|Win32
- {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Release SSE4|Win32.Build.0 = Release|Win32
- {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Release SSE4|x64.ActiveCfg = Release|x64
- {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Release SSE4|x64.Build.0 = Release|x64
{01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Release|Win32.ActiveCfg = Release|Win32
{01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Release|Win32.Build.0 = Release|Win32
{01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Release|x64.ActiveCfg = Release|x64
@@ -295,10 +253,6 @@ Global
{3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Release AVX2|Win32.Build.0 = Release|Win32
{3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Release AVX2|x64.ActiveCfg = Release|x64
{3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Release AVX2|x64.Build.0 = Release|x64
- {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Release SSE4|Win32.ActiveCfg = Release|Win32
- {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Release SSE4|Win32.Build.0 = Release|Win32
- {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Release SSE4|x64.ActiveCfg = Release|x64
- {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Release SSE4|x64.Build.0 = Release|x64
{3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Release|Win32.ActiveCfg = Release|Win32
{3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Release|Win32.Build.0 = Release|Win32
{3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Release|x64.ActiveCfg = Release|x64
@@ -315,10 +269,6 @@ Global
{24C45343-FD20-5C92-81C1-35A2AE841E79}.Release AVX2|Win32.Build.0 = Release|Win32
{24C45343-FD20-5C92-81C1-35A2AE841E79}.Release AVX2|x64.ActiveCfg = Release|x64
{24C45343-FD20-5C92-81C1-35A2AE841E79}.Release AVX2|x64.Build.0 = Release|x64
- {24C45343-FD20-5C92-81C1-35A2AE841E79}.Release SSE4|Win32.ActiveCfg = Release|Win32
- {24C45343-FD20-5C92-81C1-35A2AE841E79}.Release SSE4|Win32.Build.0 = Release|Win32
- {24C45343-FD20-5C92-81C1-35A2AE841E79}.Release SSE4|x64.ActiveCfg = Release|x64
- {24C45343-FD20-5C92-81C1-35A2AE841E79}.Release SSE4|x64.Build.0 = Release|x64
{24C45343-FD20-5C92-81C1-35A2AE841E79}.Release|Win32.ActiveCfg = Release|Win32
{24C45343-FD20-5C92-81C1-35A2AE841E79}.Release|Win32.Build.0 = Release|Win32
{24C45343-FD20-5C92-81C1-35A2AE841E79}.Release|x64.ActiveCfg = Release|x64
@@ -335,10 +285,6 @@ Global
{6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Release AVX2|Win32.Build.0 = Release|Win32
{6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Release AVX2|x64.ActiveCfg = Release|x64
{6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Release AVX2|x64.Build.0 = Release|x64
- {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Release SSE4|Win32.ActiveCfg = Release|Win32
- {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Release SSE4|Win32.Build.0 = Release|Win32
- {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Release SSE4|x64.ActiveCfg = Release|x64
- {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Release SSE4|x64.Build.0 = Release|x64
{6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Release|Win32.ActiveCfg = Release|Win32
{6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Release|Win32.Build.0 = Release|Win32
{6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Release|x64.ActiveCfg = Release|x64
@@ -355,10 +301,6 @@ Global
{D6973076-9317-4EF2-A0B8-B7A18AC0713E}.Release AVX2|Win32.Build.0 = Release|Win32
{D6973076-9317-4EF2-A0B8-B7A18AC0713E}.Release AVX2|x64.ActiveCfg = Release|x64
{D6973076-9317-4EF2-A0B8-B7A18AC0713E}.Release AVX2|x64.Build.0 = Release|x64
- {D6973076-9317-4EF2-A0B8-B7A18AC0713E}.Release SSE4|Win32.ActiveCfg = Release|Win32
- {D6973076-9317-4EF2-A0B8-B7A18AC0713E}.Release SSE4|Win32.Build.0 = Release|Win32
- {D6973076-9317-4EF2-A0B8-B7A18AC0713E}.Release SSE4|x64.ActiveCfg = Release|x64
- {D6973076-9317-4EF2-A0B8-B7A18AC0713E}.Release SSE4|x64.Build.0 = Release|x64
{D6973076-9317-4EF2-A0B8-B7A18AC0713E}.Release|Win32.ActiveCfg = Release|Win32
{D6973076-9317-4EF2-A0B8-B7A18AC0713E}.Release|Win32.Build.0 = Release|Win32
{D6973076-9317-4EF2-A0B8-B7A18AC0713E}.Release|x64.ActiveCfg = Release|x64
@@ -375,10 +317,6 @@ Global
{0FAE817D-9A32-4830-857E-81DA57246E16}.Release AVX2|Win32.Build.0 = Release|Win32
{0FAE817D-9A32-4830-857E-81DA57246E16}.Release AVX2|x64.ActiveCfg = Release|x64
{0FAE817D-9A32-4830-857E-81DA57246E16}.Release AVX2|x64.Build.0 = Release|x64
- {0FAE817D-9A32-4830-857E-81DA57246E16}.Release SSE4|Win32.ActiveCfg = Release|Win32
- {0FAE817D-9A32-4830-857E-81DA57246E16}.Release SSE4|Win32.Build.0 = Release|Win32
- {0FAE817D-9A32-4830-857E-81DA57246E16}.Release SSE4|x64.ActiveCfg = Release|x64
- {0FAE817D-9A32-4830-857E-81DA57246E16}.Release SSE4|x64.Build.0 = Release|x64
{0FAE817D-9A32-4830-857E-81DA57246E16}.Release|Win32.ActiveCfg = Release|Win32
{0FAE817D-9A32-4830-857E-81DA57246E16}.Release|Win32.Build.0 = Release|Win32
{0FAE817D-9A32-4830-857E-81DA57246E16}.Release|x64.ActiveCfg = Release|x64
@@ -395,10 +333,6 @@ Global
{27F17499-A372-4408-8AFA-4F9F4584FBD3}.Release AVX2|Win32.Build.0 = Release|Win32
{27F17499-A372-4408-8AFA-4F9F4584FBD3}.Release AVX2|x64.ActiveCfg = Release|x64
{27F17499-A372-4408-8AFA-4F9F4584FBD3}.Release AVX2|x64.Build.0 = Release|x64
- {27F17499-A372-4408-8AFA-4F9F4584FBD3}.Release SSE4|Win32.ActiveCfg = Release|Win32
- {27F17499-A372-4408-8AFA-4F9F4584FBD3}.Release SSE4|Win32.Build.0 = Release|Win32
- {27F17499-A372-4408-8AFA-4F9F4584FBD3}.Release SSE4|x64.ActiveCfg = Release|x64
- {27F17499-A372-4408-8AFA-4F9F4584FBD3}.Release SSE4|x64.Build.0 = Release|x64
{27F17499-A372-4408-8AFA-4F9F4584FBD3}.Release|Win32.ActiveCfg = Release|Win32
{27F17499-A372-4408-8AFA-4F9F4584FBD3}.Release|Win32.Build.0 = Release|Win32
{27F17499-A372-4408-8AFA-4F9F4584FBD3}.Release|x64.ActiveCfg = Release|x64
@@ -415,10 +349,6 @@ Global
{78B079BD-9FC7-4B9E-B4A6-96DA0F00248B}.Release AVX2|Win32.Build.0 = Release|Win32
{78B079BD-9FC7-4B9E-B4A6-96DA0F00248B}.Release AVX2|x64.ActiveCfg = Release|x64
{78B079BD-9FC7-4B9E-B4A6-96DA0F00248B}.Release AVX2|x64.Build.0 = Release|x64
- {78B079BD-9FC7-4B9E-B4A6-96DA0F00248B}.Release SSE4|Win32.ActiveCfg = Release|Win32
- {78B079BD-9FC7-4B9E-B4A6-96DA0F00248B}.Release SSE4|Win32.Build.0 = Release|Win32
- {78B079BD-9FC7-4B9E-B4A6-96DA0F00248B}.Release SSE4|x64.ActiveCfg = Release|x64
- {78B079BD-9FC7-4B9E-B4A6-96DA0F00248B}.Release SSE4|x64.Build.0 = Release|x64
{78B079BD-9FC7-4B9E-B4A6-96DA0F00248B}.Release|Win32.ActiveCfg = Release|Win32
{78B079BD-9FC7-4B9E-B4A6-96DA0F00248B}.Release|Win32.Build.0 = Release|Win32
{78B079BD-9FC7-4B9E-B4A6-96DA0F00248B}.Release|x64.ActiveCfg = Release|x64
@@ -435,10 +365,6 @@ Global
{12728250-16EC-4DC6-94D7-E21DD88947F8}.Release AVX2|Win32.Build.0 = Release|Win32
{12728250-16EC-4DC6-94D7-E21DD88947F8}.Release AVX2|x64.ActiveCfg = Release|x64
{12728250-16EC-4DC6-94D7-E21DD88947F8}.Release AVX2|x64.Build.0 = Release|x64
- {12728250-16EC-4DC6-94D7-E21DD88947F8}.Release SSE4|Win32.ActiveCfg = Release|Win32
- {12728250-16EC-4DC6-94D7-E21DD88947F8}.Release SSE4|Win32.Build.0 = Release|Win32
- {12728250-16EC-4DC6-94D7-E21DD88947F8}.Release SSE4|x64.ActiveCfg = Release|x64
- {12728250-16EC-4DC6-94D7-E21DD88947F8}.Release SSE4|x64.Build.0 = Release|x64
{12728250-16EC-4DC6-94D7-E21DD88947F8}.Release|Win32.ActiveCfg = Release|Win32
{12728250-16EC-4DC6-94D7-E21DD88947F8}.Release|Win32.Build.0 = Release|Win32
{12728250-16EC-4DC6-94D7-E21DD88947F8}.Release|x64.ActiveCfg = Release|x64
@@ -455,10 +381,6 @@ Global
{449AD25E-424A-4714-BABC-68706CDCC33B}.Release AVX2|Win32.Build.0 = Release|Win32
{449AD25E-424A-4714-BABC-68706CDCC33B}.Release AVX2|x64.ActiveCfg = Release|x64
{449AD25E-424A-4714-BABC-68706CDCC33B}.Release AVX2|x64.Build.0 = Release|x64
- {449AD25E-424A-4714-BABC-68706CDCC33B}.Release SSE4|Win32.ActiveCfg = Release|Win32
- {449AD25E-424A-4714-BABC-68706CDCC33B}.Release SSE4|Win32.Build.0 = Release|Win32
- {449AD25E-424A-4714-BABC-68706CDCC33B}.Release SSE4|x64.ActiveCfg = Release|x64
- {449AD25E-424A-4714-BABC-68706CDCC33B}.Release SSE4|x64.Build.0 = Release|x64
{449AD25E-424A-4714-BABC-68706CDCC33B}.Release|Win32.ActiveCfg = Release|Win32
{449AD25E-424A-4714-BABC-68706CDCC33B}.Release|Win32.Build.0 = Release|Win32
{449AD25E-424A-4714-BABC-68706CDCC33B}.Release|x64.ActiveCfg = Release|x64
@@ -475,10 +397,6 @@ Global
{47AFDBEF-F15F-4BC0-B436-5BE443C3F80F}.Release AVX2|Win32.Build.0 = Release|Win32
{47AFDBEF-F15F-4BC0-B436-5BE443C3F80F}.Release AVX2|x64.ActiveCfg = Release|x64
{47AFDBEF-F15F-4BC0-B436-5BE443C3F80F}.Release AVX2|x64.Build.0 = Release|x64
- {47AFDBEF-F15F-4BC0-B436-5BE443C3F80F}.Release SSE4|Win32.ActiveCfg = Release|Win32
- {47AFDBEF-F15F-4BC0-B436-5BE443C3F80F}.Release SSE4|Win32.Build.0 = Release|Win32
- {47AFDBEF-F15F-4BC0-B436-5BE443C3F80F}.Release SSE4|x64.ActiveCfg = Release|x64
- {47AFDBEF-F15F-4BC0-B436-5BE443C3F80F}.Release SSE4|x64.Build.0 = Release|x64
{47AFDBEF-F15F-4BC0-B436-5BE443C3F80F}.Release|Win32.ActiveCfg = Release|Win32
{47AFDBEF-F15F-4BC0-B436-5BE443C3F80F}.Release|Win32.Build.0 = Release|Win32
{47AFDBEF-F15F-4BC0-B436-5BE443C3F80F}.Release|x64.ActiveCfg = Release|x64
@@ -495,10 +413,6 @@ Global
{48329442-E41B-4A1F-8364-36EEE1B71343}.Release AVX2|Win32.Build.0 = Release|Win32
{48329442-E41B-4A1F-8364-36EEE1B71343}.Release AVX2|x64.ActiveCfg = Release|x64
{48329442-E41B-4A1F-8364-36EEE1B71343}.Release AVX2|x64.Build.0 = Release|x64
- {48329442-E41B-4A1F-8364-36EEE1B71343}.Release SSE4|Win32.ActiveCfg = Release|Win32
- {48329442-E41B-4A1F-8364-36EEE1B71343}.Release SSE4|Win32.Build.0 = Release|Win32
- {48329442-E41B-4A1F-8364-36EEE1B71343}.Release SSE4|x64.ActiveCfg = Release|x64
- {48329442-E41B-4A1F-8364-36EEE1B71343}.Release SSE4|x64.Build.0 = Release|x64
{48329442-E41B-4A1F-8364-36EEE1B71343}.Release|Win32.ActiveCfg = Release|Win32
{48329442-E41B-4A1F-8364-36EEE1B71343}.Release|Win32.Build.0 = Release|Win32
{48329442-E41B-4A1F-8364-36EEE1B71343}.Release|x64.ActiveCfg = Release|x64
@@ -515,10 +429,6 @@ Global
{A0D2B3AD-1F72-4EE3-8B5C-F2C358DA35F0}.Release AVX2|Win32.Build.0 = Release|Win32
{A0D2B3AD-1F72-4EE3-8B5C-F2C358DA35F0}.Release AVX2|x64.ActiveCfg = Release|x64
{A0D2B3AD-1F72-4EE3-8B5C-F2C358DA35F0}.Release AVX2|x64.Build.0 = Release|x64
- {A0D2B3AD-1F72-4EE3-8B5C-F2C358DA35F0}.Release SSE4|Win32.ActiveCfg = Release|Win32
- {A0D2B3AD-1F72-4EE3-8B5C-F2C358DA35F0}.Release SSE4|Win32.Build.0 = Release|Win32
- {A0D2B3AD-1F72-4EE3-8B5C-F2C358DA35F0}.Release SSE4|x64.ActiveCfg = Release|x64
- {A0D2B3AD-1F72-4EE3-8B5C-F2C358DA35F0}.Release SSE4|x64.Build.0 = Release|x64
{A0D2B3AD-1F72-4EE3-8B5C-F2C358DA35F0}.Release|Win32.ActiveCfg = Release|Win32
{A0D2B3AD-1F72-4EE3-8B5C-F2C358DA35F0}.Release|Win32.Build.0 = Release|Win32
{A0D2B3AD-1F72-4EE3-8B5C-F2C358DA35F0}.Release|x64.ActiveCfg = Release|x64
diff --git a/buildbot.xml b/buildbot.xml
index 9843a45e65f9e..8cf22b62b2ef1 100644
--- a/buildbot.xml
+++ b/buildbot.xml
@@ -53,7 +53,7 @@
-
+
@@ -62,7 +62,7 @@
-
+
@@ -70,7 +70,7 @@
Release
-
+
diff --git a/pcsx2/SPU2/spu2.cpp b/pcsx2/SPU2/spu2.cpp
index 0474baa668758..c924887486114 100644
--- a/pcsx2/SPU2/spu2.cpp
+++ b/pcsx2/SPU2/spu2.cpp
@@ -47,30 +47,8 @@ u32* cyclePtr = nullptr;
u32 lClocks = 0;
//static bool cpu_detected = false;
-static bool CheckSSE()
-{
- return true;
-
-#if 0
- if( !cpu_detected )
- {
- cpudetectInit();
- cpu_detected = true;
- }
- if( !x86caps.hasStreamingSIMDExtensions || !x86caps.hasStreamingSIMD2Extensions )
- {
- SysMessage( "Your CPU does not support SSE2 instructions.\nThe SPU2 plugin requires SSE2 to run." );
- return false;
- }
- return true;
-#endif
-}
-
void SPU2configure()
{
- if (!CheckSSE())
- return;
-
ScopedCoreThreadPause paused_core;
configure();
paused_core.AllowResume();
diff --git a/pcsx2/gui/AppInit.cpp b/pcsx2/gui/AppInit.cpp
index 47f0b3da19075..963b217b88c3d 100644
--- a/pcsx2/gui/AppInit.cpp
+++ b/pcsx2/gui/AppInit.cpp
@@ -46,13 +46,13 @@ void Pcsx2App::DetectCpuAndUserMode()
x86caps.CountCores();
x86caps.SIMD_EstablishMXCSRmask();
- if (!x86caps.hasStreamingSIMD2Extensions)
+ if (!x86caps.hasStreamingSIMD4Extensions)
{
- // This code will probably never run if the binary was correctly compiled for SSE2
- // SSE2 is required for any decent speed and is supported by more than decade old x86 CPUs
+ // This code will probably never run if the binary was correctly compiled for SSE4
+ // SSE4 is required for any decent speed and is supported by more than decade old x86 CPUs
throw Exception::HardwareDeficiency()
- .SetDiagMsg(L"Critical Failure: SSE2 Extensions not available.")
- .SetUserMsg(_("SSE2 extensions are not available. PCSX2 requires a cpu that supports the SSE2 instruction set."));
+ .SetDiagMsg(L"Critical Failure: SSE4.1 Extensions not available.")
+ .SetUserMsg(_("SSE4 extensions are not available. PCSX2 requires a cpu that supports the SSE4.1 instruction set."));
}
#endif
diff --git a/pcsx2/gui/Panels/PluginSelectorPanel.cpp b/pcsx2/gui/Panels/PluginSelectorPanel.cpp
index 6ef3d431d4072..ff8152caf44d6 100644
--- a/pcsx2/gui/Panels/PluginSelectorPanel.cpp
+++ b/pcsx2/gui/Panels/PluginSelectorPanel.cpp
@@ -684,7 +684,6 @@ void Panels::PluginSelectorPanel::OnEnumComplete( wxCommandEvent& evt )
int index_avx2 = -1;
int index_sse4 = -1;
- int index_sse2 = -1;
for( int i = 0; i < count; i++ )
{
@@ -692,12 +691,10 @@ void Panels::PluginSelectorPanel::OnEnumComplete( wxCommandEvent& evt )
if( x86caps.hasAVX2 && str.Contains("AVX2") ) index_avx2 = i;
if( x86caps.hasStreamingSIMD4Extensions && str.Contains("SSE4") ) index_sse4 = i;
- if( str.Contains("SSE2") ) index_sse2 = i;
}
if( index_avx2 >= 0 ) m_ComponentBoxes->Get(pid).SetSelection( index_avx2 );
else if( index_sse4 >= 0 ) m_ComponentBoxes->Get(pid).SetSelection( index_sse4 );
- else if( index_sse2 >= 0 ) m_ComponentBoxes->Get(pid).SetSelection( index_sse2 );
else m_ComponentBoxes->Get(pid).SetSelection( 0 );
}
else
diff --git a/pcsx2/x86/iMMI.cpp b/pcsx2/x86/iMMI.cpp
index 99a4c98fb3054..4a18e828abedf 100644
--- a/pcsx2/x86/iMMI.cpp
+++ b/pcsx2/x86/iMMI.cpp
@@ -215,17 +215,9 @@ void recPMTHL()
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI );
- if ( x86caps.hasStreamingSIMD4Extensions ) {
- xBLEND.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_S), 0x5);
- xSHUF.PS(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_S), 0xdd);
- xSHUF.PS(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI), 0x72);
- }
- else {
- xSHUF.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_S), 0x8d);
- xSHUF.PS(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_S), 0xdd);
- xSHUF.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO), 0x72);
- xSHUF.PS(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI), 0x72);
- }
+ xBLEND.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_S), 0x5);
+ xSHUF.PS(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_S), 0xdd);
+ xSHUF.PS(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI), 0x72);
_clearNeededXMMregs();
}
@@ -400,47 +392,12 @@ void recPMAXW()
EE::Profiler.EmitOp(eeOpcode::PMAXW);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
- if ( x86caps.hasStreamingSIMD4Extensions ) {
- if( EEREC_S == EEREC_T ) xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
- else if( EEREC_D == EEREC_S ) xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
- else if ( EEREC_D == EEREC_T ) xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
- else {
- xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
- xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
- }
- }
+ if( EEREC_S == EEREC_T ) xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
+ else if( EEREC_D == EEREC_S ) xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
+ else if ( EEREC_D == EEREC_T ) xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
else {
- int t0reg;
-
- if( EEREC_S == EEREC_T ) {
- xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
- }
- else {
- t0reg = _allocTempXMMreg(XMMT_INT, -1);
- xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
- xPCMP.GTD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
-
- if( EEREC_D == EEREC_S ) {
- xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
- xPANDN(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
- }
- else if( EEREC_D == EEREC_T ) {
- int t1reg = _allocTempXMMreg(XMMT_INT, -1);
- xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T));
- xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
- xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
- xPANDN(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
- _freeXMMreg(t1reg);
- }
- else {
- xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
- xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
- xPANDN(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
- }
-
- xPOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
- _freeXMMreg(t0reg);
- }
+ xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
+ xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
}
_clearNeededXMMregs();
}
@@ -1173,18 +1130,7 @@ void recPABSW() //needs clamping
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
xPSLL.D(xRegisterSSE(t0reg), 31);
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); //0xffffffff if equal to 0x80000000
- if( x86caps.hasSupplementalStreamingSIMD3Extensions ) {
- xPABS.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); //0x80000000 -> 0x80000000
- }
- else {
- int t1reg = _allocTempXMMreg(XMMT_INT, -1);
- xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T));
- xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
- xPSRA.D(xRegisterSSE(t1reg), 31);
- xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg));
- xPSUB.D(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); //0x80000000 -> 0x80000000
- _freeXMMreg(t1reg);
- }
+ xPABS.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); //0x80000000 -> 0x80000000
xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); //0x80000000 -> 0x7fffffff
_freeXMMreg(t0reg);
_clearNeededXMMregs();
@@ -1203,18 +1149,7 @@ void recPABSH()
xPCMP.EQW(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
xPSLL.W(xRegisterSSE(t0reg), 15);
xPCMP.EQW(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); //0xffff if equal to 0x8000
- if( x86caps.hasSupplementalStreamingSIMD3Extensions ) {
- xPABS.W(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); //0x8000 -> 0x8000
- }
- else {
- int t1reg = _allocTempXMMreg(XMMT_INT, -1);
- xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T));
- xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
- xPSRA.W(xRegisterSSE(t1reg), 15);
- xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg));
- xPSUB.W(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); //0x8000 -> 0x8000
- _freeXMMreg(t1reg);
- }
+ xPABS.W(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); //0x8000 -> 0x8000
xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); //0x8000 -> 0x7fff
_freeXMMreg(t0reg);
_clearNeededXMMregs();
@@ -1228,47 +1163,12 @@ void recPMINW()
EE::Profiler.EmitOp(eeOpcode::PMINW);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
- if ( x86caps.hasStreamingSIMD4Extensions ) {
- if( EEREC_S == EEREC_T ) xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
- else if( EEREC_D == EEREC_S ) xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
- else if ( EEREC_D == EEREC_T ) xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
- else {
- xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
- xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
- }
- }
+ if( EEREC_S == EEREC_T ) xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
+ else if( EEREC_D == EEREC_S ) xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
+ else if ( EEREC_D == EEREC_T ) xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
else {
- int t0reg;
-
- if( EEREC_S == EEREC_T ) {
- xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
- }
- else {
- t0reg = _allocTempXMMreg(XMMT_INT, -1);
- xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
- xPCMP.GTD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
-
- if( EEREC_D == EEREC_S ) {
- xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
- xPANDN(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
- }
- else if( EEREC_D == EEREC_T ) {
- int t1reg = _allocTempXMMreg(XMMT_INT, -1);
- xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T));
- xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
- xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
- xPANDN(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
- _freeXMMreg(t1reg);
- }
- else {
- xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
- xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
- xPANDN(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
- }
-
- xPOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
- _freeXMMreg(t0reg);
- }
+ xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
+ xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
}
_clearNeededXMMregs();
}
@@ -1718,12 +1618,6 @@ void recPMADDW()
{
EE::Profiler.EmitOp(eeOpcode::PMADDW);
- if( !x86caps.hasStreamingSIMD4Extensions ) {
- _deleteEEreg(_Rd_, 0);
- recCall(Interp::PMADDW);
- return;
- }
-
int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI );
xSHUF.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_HI), 0x88);
xPSHUF.D(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO), 0xd8); // LO = {LO[0], HI[0], LO[2], HI[2]}
@@ -1775,18 +1669,8 @@ void recPSLLVW()
xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
}
else {
- if ( x86caps.hasStreamingSIMD4Extensions ) {
- xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88);
- xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
- }
- else {
- int t0reg = _allocTempXMMreg(XMMT_INT, -1);
- xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88);
- xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_D));
- xPSRA.D(xRegisterSSE(t0reg), 31);
- xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
- _freeXMMreg(t0reg);
- }
+ xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88);
+ xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
}
}
else if( _Rt_ == 0 ) {
@@ -1813,16 +1697,8 @@ void recPSLLVW()
xPSLL.D(xRegisterSSE(t1reg), xRegisterSSE(t0reg));
// merge & sign extend
- if ( x86caps.hasStreamingSIMD4Extensions ) {
- xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg));
- xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
- }
- else {
- xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg));
- xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_D));
- xPSRA.D(xRegisterSSE(t0reg), 31); // get the signs
- xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
- }
+ xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg));
+ xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
_freeXMMreg(t0reg);
_freeXMMreg(t1reg);
@@ -1843,18 +1719,8 @@ void recPSRLVW()
xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
}
else {
- if ( x86caps.hasStreamingSIMD4Extensions ) {
- xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88);
- xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
- }
- else {
- int t0reg = _allocTempXMMreg(XMMT_INT, -1);
- xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88);
- xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_D));
- xPSRA.D(xRegisterSSE(t0reg), 31);
- xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
- _freeXMMreg(t0reg);
- }
+ xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88);
+ xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
}
}
else if( _Rt_ == 0 ) {
@@ -1881,16 +1747,8 @@ void recPSRLVW()
xPSRL.D(xRegisterSSE(t1reg), xRegisterSSE(t0reg));
// merge & sign extend
- if ( x86caps.hasStreamingSIMD4Extensions ) {
- xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg));
- xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
- }
- else {
- xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg));
- xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_D));
- xPSRA.D(xRegisterSSE(t0reg), 31); // get the signs
- xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
- }
+ xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg));
+ xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
_freeXMMreg(t0reg);
_freeXMMreg(t1reg);
@@ -1903,11 +1761,6 @@ void recPMSUBW()
{
EE::Profiler.EmitOp(eeOpcode::PMSUBW);
- if( !x86caps.hasStreamingSIMD4Extensions ) {
- _deleteEEreg(_Rd_, 0);
- recCall(Interp::PMSUBW);
- return;
- }
int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI );
xSHUF.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_HI), 0x88);
xPSHUF.D(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO), 0xd8); // LO = {LO[0], HI[0], LO[2], HI[2]}
@@ -1957,11 +1810,6 @@ void recPMULTW()
{
EE::Profiler.EmitOp(eeOpcode::PMULTW);
- if( !x86caps.hasStreamingSIMD4Extensions ) {
- _deleteEEreg(_Rd_, 0);
- recCall(Interp::PMULTW);
- return;
- }
int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI );
if( !_Rs_ || !_Rt_ ) {
if( _Rd_ ) xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
@@ -2455,18 +2303,8 @@ void recPSRAVW()
xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
}
else {
- if ( x86caps.hasStreamingSIMD4Extensions ) {
- xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88);
- xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
- }
- else {
- int t0reg = _allocTempXMMreg(XMMT_INT, -1);
- xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88);
- xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_D));
- xPSRA.D(xRegisterSSE(t0reg), 31);
- xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
- _freeXMMreg(t0reg);
- }
+ xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88);
+ xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
}
}
else if( _Rt_ == 0 ) {
diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp
index 355491f2eef60..ba26361e04d3f 100644
--- a/pcsx2/x86/ix86-32/iR5900-32.cpp
+++ b/pcsx2/x86/ix86-32/iR5900-32.cpp
@@ -508,8 +508,8 @@ static void recReserve()
{
// Hardware Requirements Check...
- if ( !x86caps.hasStreamingSIMD2Extensions )
- recThrowHardwareDeficiency( L"SSE2" );
+ if ( !x86caps.hasStreamingSIMD4Extensions )
+ recThrowHardwareDeficiency( L"SSE4" );
recReserveCache();
}
diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp
index e2d9cee3e8fbf..f5db622152130 100644
--- a/pcsx2/x86/microVU.cpp
+++ b/pcsx2/x86/microVU.cpp
@@ -47,7 +47,7 @@ void mVUreserveCache(microVU& mVU) {
// Only run this once per VU! ;)
void mVUinit(microVU& mVU, uint vuIndex) {
- if(!x86caps.hasStreamingSIMD2Extensions) mVUthrowHardwareDeficiency( L"SSE2", vuIndex );
+ if(!x86caps.hasStreamingSIMD4Extensions) mVUthrowHardwareDeficiency( L"SSE4", vuIndex );
memzero(mVU.prog);
diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl
index fa65645b6110d..1634c34cf87ed 100644
--- a/pcsx2/x86/microVU_Alloc.inl
+++ b/pcsx2/x86/microVU_Alloc.inl
@@ -166,13 +166,7 @@ __fi void getQreg(const xmm& reg, int qInstance)
__ri void writeQreg(const xmm& reg, int qInstance)
{
- if (qInstance) {
- if (!x86caps.hasStreamingSIMD4Extensions) {
- xPSHUF.D(xmmPQ, xmmPQ, 0xe1);
- xMOVSS(xmmPQ, reg);
- xPSHUF.D(xmmPQ, xmmPQ, 0xe1);
- }
- else xINSERTPS(xmmPQ, reg, _MM_MK_INSERTPS_NDX(0, 1, 0));
- }
+ if (qInstance)
+ xINSERTPS(xmmPQ, reg, _MM_MK_INSERTPS_NDX(0, 1, 0));
else xMOVSS(xmmPQ, reg);
}
diff --git a/pcsx2/x86/microVU_Clamp.inl b/pcsx2/x86/microVU_Clamp.inl
index e1ebef153ba94..5a145ed373fe0 100644
--- a/pcsx2/x86/microVU_Clamp.inl
+++ b/pcsx2/x86/microVU_Clamp.inl
@@ -56,33 +56,10 @@ void mVUclamp1(const xmm& reg, const xmm& regT1, int xyzw, bool bClampE = 0) {
// so we just use a temporary mem location for our backup for now... (non-sse4 version only)
void mVUclamp2(microVU& mVU, const xmm& reg, const xmm& regT1in, int xyzw, bool bClampE = 0) {
if ((!clampE && CHECK_VU_SIGN_OVERFLOW) || (clampE && bClampE && CHECK_VU_SIGN_OVERFLOW)) {
- if (x86caps.hasStreamingSIMD4Extensions) {
- int i = (xyzw==1||xyzw==2||xyzw==4||xyzw==8) ? 0: 1;
- xPMIN.SD(reg, ptr128[&sse4_maxvals[i][0]]);
- xPMIN.UD(reg, ptr128[&sse4_minvals[i][0]]);
- return;
- }
- //const xmm& regT1 = regT1b ? mVU.regAlloc->allocReg() : regT1in;
- const xmm& regT1 = regT1in.IsEmpty() ? xmm((reg.Id + 1) % 8) : regT1in;
- if (regT1 != regT1in) xMOVAPS(ptr128[mVU.xmmCTemp], regT1);
- switch (xyzw) {
- case 1: case 2: case 4: case 8:
- xMOVAPS(regT1, reg);
- xAND.PS(regT1, ptr128[mVUglob.signbit]);
- xMIN.SS(reg, ptr128[mVUglob.maxvals]);
- xMAX.SS(reg, ptr128[mVUglob.minvals]);
- xOR.PS (reg, regT1);
- break;
- default:
- xMOVAPS(regT1, reg);
- xAND.PS(regT1, ptr128[mVUglob.signbit]);
- xMIN.PS(reg, ptr128[mVUglob.maxvals]);
- xMAX.PS(reg, ptr128[mVUglob.minvals]);
- xOR.PS (reg, regT1);
- break;
- }
- //if (regT1 != regT1in) mVU.regAlloc->clearNeeded(regT1);
- if (regT1 != regT1in) xMOVAPS(regT1, ptr128[mVU.xmmCTemp]);
+ int i = (xyzw==1||xyzw==2||xyzw==4||xyzw==8) ? 0: 1;
+ xPMIN.SD(reg, ptr128[&sse4_maxvals[i][0]]);
+ xPMIN.UD(reg, ptr128[&sse4_minvals[i][0]]);
+ return;
}
else mVUclamp1(reg, regT1in, xyzw, bClampE);
}
diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl
index 6a761f1b44007..850ddc9a3f0ef 100644
--- a/pcsx2/x86/microVU_Lower.inl
+++ b/pcsx2/x86/microVU_Lower.inl
@@ -28,11 +28,7 @@ static __fi void testZero(const xmm& xmmReg, const xmm& xmmTemp, const x32& gprT
{
xXOR.PS(xmmTemp, xmmTemp);
xCMPEQ.SS(xmmTemp, xmmReg);
- if (!x86caps.hasStreamingSIMD4Extensions) {
- xMOVMSKPS(gprTemp, xmmTemp);
- xTEST(gprTemp, 1);
- }
- else xPTEST(xmmTemp, xmmTemp);
+ xPTEST(xmmTemp, xmmTemp);
}
// Test if Vector is Negative (Set Flags and Makes Positive)
@@ -298,18 +294,8 @@ mVUop(mVU_EEXP) {
// sumXYZ(): PQ.x = x ^ 2 + y ^ 2 + z ^ 2
static __fi void mVU_sumXYZ(mV, const xmm& PQ, const xmm& Fs) {
- if (x86caps.hasStreamingSIMD4Extensions) {
- xDP.PS(Fs, Fs, 0x71);
- xMOVSS(PQ, Fs);
- }
- else {
- SSE_MULPS(mVU, Fs, Fs); // wzyx ^ 2
- xMOVSS (PQ, Fs); // x ^ 2
- xPSHUF.D (Fs, Fs, 0xe1); // wzyx -> wzxy
- SSE_ADDSS(mVU, PQ, Fs); // x ^ 2 + y ^ 2
- xPSHUF.D (Fs, Fs, 0xd2); // wzxy -> wxyz
- SSE_ADDSS(mVU, PQ, Fs); // x ^ 2 + y ^ 2 + z ^ 2
- }
+ xDP.PS(Fs, Fs, 0x71);
+ xMOVSS(PQ, Fs);
}
mVUop(mVU_ELENG) {
diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl
index 98a945131ca9b..47c4a3b5e2fa4 100644
--- a/pcsx2/x86/microVU_Misc.inl
+++ b/pcsx2/x86/microVU_Misc.inl
@@ -59,72 +59,29 @@ void mVUsaveReg(const xmm& reg, xAddressVoid ptr, int xyzw, bool modXYZW)
return;*/
switch ( xyzw ) {
- case 5: if (x86caps.hasStreamingSIMD4Extensions) {
- xEXTRACTPS(ptr32[ptr+4], reg, 1);
- xEXTRACTPS(ptr32[ptr+12], reg, 3);
- }
- else {
- xPSHUF.D(reg, reg, 0xe1); //WZXY
- xMOVSS(ptr32[ptr+4], reg);
- xPSHUF.D(reg, reg, 0xff); //WWWW
- xMOVSS(ptr32[ptr+12], reg);
- }
+ case 5: xEXTRACTPS(ptr32[ptr+4], reg, 1);
+ xEXTRACTPS(ptr32[ptr+12], reg, 3);
break; // YW
case 6: xPSHUF.D(reg, reg, 0xc9);
xMOVL.PS(ptr64[ptr+4], reg);
break; // YZ
- case 7: if (x86caps.hasStreamingSIMD4Extensions) {
- xMOVH.PS(ptr64[ptr+8], reg);
- xEXTRACTPS(ptr32[ptr+4], reg, 1);
- }
- else {
- xPSHUF.D(reg, reg, 0x93); //ZYXW
- xMOVH.PS(ptr64[ptr+4], reg);
- xMOVSS(ptr32[ptr+12], reg);
- }
+ case 7: xMOVH.PS(ptr64[ptr+8], reg);
+ xEXTRACTPS(ptr32[ptr+4], reg, 1);
break; // YZW
- case 9: if (x86caps.hasStreamingSIMD4Extensions) {
- xMOVSS(ptr32[ptr], reg);
- xEXTRACTPS(ptr32[ptr+12], reg, 3);
- }
- else {
- xMOVSS(ptr32[ptr], reg);
- xPSHUF.D(reg, reg, 0xff); //WWWW
- xMOVSS(ptr32[ptr+12], reg);
- }
+ case 9: xMOVSS(ptr32[ptr], reg);
+ xEXTRACTPS(ptr32[ptr+12], reg, 3);
break; // XW
- case 10: if (x86caps.hasStreamingSIMD4Extensions) {
- xMOVSS(ptr32[ptr], reg);
- xEXTRACTPS(ptr32[ptr+8], reg, 2);
- }
- else {
- xMOVSS(ptr32[ptr], reg);
- xMOVHL.PS(reg, reg);
- xMOVSS(ptr32[ptr+8], reg);
- }
+ case 10: xMOVSS(ptr32[ptr], reg);
+ xEXTRACTPS(ptr32[ptr+8], reg, 2);
break; //XZ
case 11: xMOVSS(ptr32[ptr], reg);
xMOVH.PS(ptr64[ptr+8], reg);
break; //XZW
- case 13: if (x86caps.hasStreamingSIMD4Extensions) {
- xMOVL.PS(ptr64[ptr], reg);
- xEXTRACTPS(ptr32[ptr+12], reg, 3);
- }
- else {
- xPSHUF.D(reg, reg, 0x4b); //YXZW
- xMOVH.PS(ptr64[ptr], reg);
- xMOVSS(ptr32[ptr+12], reg);
- }
+ case 13: xMOVL.PS(ptr64[ptr], reg);
+ xEXTRACTPS(ptr32[ptr+12], reg, 3);
break; // XYW
- case 14: if (x86caps.hasStreamingSIMD4Extensions) {
- xMOVL.PS(ptr64[ptr], reg);
- xEXTRACTPS(ptr32[ptr+8], reg, 2);
- }
- else {
- xMOVL.PS(ptr64[ptr], reg);
- xMOVHL.PS(reg, reg);
- xMOVSS(ptr32[ptr+8], reg);
- }
+ case 14: xMOVL.PS(ptr64[ptr], reg);
+ xEXTRACTPS(ptr32[ptr+8], reg, 2);
break; // XYZ
case 4: if (!modXYZW) mVUunpack_xyzw(reg, reg, 1);
xMOVSS(ptr32[ptr+4], reg);
@@ -146,8 +103,14 @@ void mVUsaveReg(const xmm& reg, xAddressVoid ptr, int xyzw, bool modXYZW)
void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW)
{
xyzw &= 0xf;
- if ( (dest != src) && (xyzw != 0) ) {
- if (x86caps.hasStreamingSIMD4Extensions && (xyzw != 0x8) && (xyzw != 0xf)) {
+ if ( (dest != src) && (xyzw != 0) )
+ {
+ if (xyzw == 0x8)
+ xMOVSS(dest, src);
+ else if (xyzw == 0xf)
+ xMOVAPS(dest, src);
+ else
+ {
if (modXYZW) {
if (xyzw == 1) { xINSERTPS(dest, src, _MM_MK_INSERTPS_NDX(0, 3, 0)); return; }
else if (xyzw == 2) { xINSERTPS(dest, src, _MM_MK_INSERTPS_NDX(0, 2, 0)); return; }
@@ -156,56 +119,6 @@ void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW)
xyzw = ((xyzw & 1) << 3) | ((xyzw & 2) << 1) | ((xyzw & 4) >> 1) | ((xyzw & 8) >> 3);
xBLEND.PS(dest, src, xyzw);
}
- else {
- switch (xyzw) {
- case 1: if (modXYZW) mVUunpack_xyzw(src, src, 0);
- xMOVHL.PS(src, dest); // src = Sw Sz Dw Dz
- xSHUF.PS(dest, src, 0xc4); // 11 00 01 00
- break;
- case 2: if (modXYZW) mVUunpack_xyzw(src, src, 0);
- xMOVHL.PS(src, dest);
- xSHUF.PS(dest, src, 0x64);
- break;
- case 3: xSHUF.PS(dest, src, 0xe4);
- break;
- case 4: if (modXYZW) mVUunpack_xyzw(src, src, 0);
- xMOVSS(src, dest);
- xMOVSD(dest, src);
- break;
- case 5: xSHUF.PS(dest, src, 0xd8);
- xPSHUF.D(dest, dest, 0xd8);
- break;
- case 6: xSHUF.PS(dest, src, 0x9c);
- xPSHUF.D(dest, dest, 0x78);
- break;
- case 7: xMOVSS(src, dest);
- xMOVAPS(dest, src);
- break;
- case 8: xMOVSS(dest, src);
- break;
- case 9: xSHUF.PS(dest, src, 0xc9);
- xPSHUF.D(dest, dest, 0xd2);
- break;
- case 10: xSHUF.PS(dest, src, 0x8d);
- xPSHUF.D(dest, dest, 0x72);
- break;
- case 11: xMOVSS(dest, src);
- xSHUF.PS(dest, src, 0xe4);
- break;
- case 12: xMOVSD(dest, src);
- break;
- case 13: xMOVHL.PS(dest, src);
- xSHUF.PS(src, dest, 0x64);
- xMOVAPS(dest, src);
- break;
- case 14: xMOVHL.PS(dest, src);
- xSHUF.PS(src, dest, 0xc4);
- xMOVAPS(dest, src);
- break;
- default: xMOVAPS(dest, src);
- break;
- }
- }
}
}
diff --git a/pcsx2/x86/newVif_UnpackSSE.cpp b/pcsx2/x86/newVif_UnpackSSE.cpp
index fad4a67ce60d4..d59c52328d858 100644
--- a/pcsx2/x86/newVif_UnpackSSE.cpp
+++ b/pcsx2/x86/newVif_UnpackSSE.cpp
@@ -35,23 +35,7 @@ static RecompiledCodeReserve* nVifUpkExec = NULL;
// Merges xmm vectors without modifying source reg
void mergeVectors(xRegisterSSE dest, xRegisterSSE src, xRegisterSSE temp, int xyzw) {
- if (x86caps.hasStreamingSIMD4Extensions || (xyzw==15)
- || (xyzw==12) || (xyzw==11) || (xyzw==8) || (xyzw==3)) {
- mVUmergeRegs(dest, src, xyzw);
- }
- else
- {
- if(temp != src) xMOVAPS(temp, src); //Sometimes we don't care if the source is modified and is temp reg.
- if(dest == temp)
- {
- //VIF can sent the temp directory as the source and destination, just need to clear the ones we dont want in which case.
- if(!(xyzw & 0x1)) xAND.PS( dest, ptr128[SSEXYZWMask[0]]);
- if(!(xyzw & 0x2)) xAND.PS( dest, ptr128[SSEXYZWMask[1]]);
- if(!(xyzw & 0x4)) xAND.PS( dest, ptr128[SSEXYZWMask[2]]);
- if(!(xyzw & 0x8)) xAND.PS( dest, ptr128[SSEXYZWMask[3]]);
- }
- else mVUmergeRegs(dest, temp, xyzw);
- }
+ mVUmergeRegs(dest, src, xyzw);
}
// =====================================================================================================
@@ -113,16 +97,6 @@ void VifUnpackSSE_Base::xUPK_S_32() const {
void VifUnpackSSE_Base::xUPK_S_16() const {
- if (!x86caps.hasStreamingSIMD4Extensions)
- {
- xMOV16 (workReg, ptr32[srcIndirect]);
- xPUNPCK.LWD(workReg, workReg);
- xShiftR (workReg, 16);
-
- xPSHUF.D (destReg, workReg, _v0);
- return;
- }
-
switch(UnpkLoopIteration)
{
case 0:
@@ -144,17 +118,6 @@ void VifUnpackSSE_Base::xUPK_S_16() const {
void VifUnpackSSE_Base::xUPK_S_8() const {
- if (!x86caps.hasStreamingSIMD4Extensions)
- {
- xMOV8 (workReg, ptr32[srcIndirect]);
- xPUNPCK.LBW(workReg, workReg);
- xPUNPCK.LWD(workReg, workReg);
- xShiftR (workReg, 24);
-
- xPSHUF.D (destReg, workReg, _v0);
- return;
- }
-
switch(UnpkLoopIteration)
{
case 0:
@@ -200,18 +163,8 @@ void VifUnpackSSE_Base::xUPK_V2_16() const {
if(UnpkLoopIteration == 0)
{
- if (x86caps.hasStreamingSIMD4Extensions)
- {
- xPMOVXX16 (workReg);
-
- }
- else
- {
- xMOV64 (workReg, ptr64[srcIndirect]);
- xPUNPCK.LWD(workReg, workReg);
- xShiftR (workReg, 16);
- }
- xPSHUF.D (destReg, workReg, 0x44); //v1v0v1v0
+ xPMOVXX16 (workReg);
+ xPSHUF.D (destReg, workReg, 0x44); //v1v0v1v0
}
else
{
@@ -223,19 +176,9 @@ void VifUnpackSSE_Base::xUPK_V2_16() const {
void VifUnpackSSE_Base::xUPK_V2_8() const {
- if(UnpkLoopIteration == 0 || !x86caps.hasStreamingSIMD4Extensions)
+ if(UnpkLoopIteration == 0)
{
- if (x86caps.hasStreamingSIMD4Extensions)
- {
- xPMOVXX8 (workReg);
- }
- else
- {
- xMOV16 (workReg, ptr32[srcIndirect]);
- xPUNPCK.LBW(workReg, workReg);
- xPUNPCK.LWD(workReg, workReg);
- xShiftR (workReg, 24);
- }
+ xPMOVXX8 (workReg);
xPSHUF.D (destReg, workReg, 0x44); //v1v0v1v0
}
else
@@ -254,16 +197,7 @@ void VifUnpackSSE_Base::xUPK_V3_32() const {
void VifUnpackSSE_Base::xUPK_V3_16() const {
- if (x86caps.hasStreamingSIMD4Extensions)
- {
- xPMOVXX16 (destReg);
- }
- else
- {
- xMOV64 (destReg, ptr32[srcIndirect]);
- xPUNPCK.LWD(destReg, destReg);
- xShiftR (destReg, 16);
- }
+ xPMOVXX16 (destReg);
//With V3-16, it takes the first vector from the next position as the W vector
//However - IF the end of this iteration of the unpack falls on a quadword boundary, W becomes 0
@@ -278,17 +212,7 @@ void VifUnpackSSE_Base::xUPK_V3_16() const {
void VifUnpackSSE_Base::xUPK_V3_8() const {
- if (x86caps.hasStreamingSIMD4Extensions)
- {
- xPMOVXX8 (destReg);
- }
- else
- {
- xMOV32 (destReg, ptr32[srcIndirect]);
- xPUNPCK.LBW(destReg, destReg);
- xPUNPCK.LWD(destReg, destReg);
- xShiftR (destReg, 24);
- }
+ xPMOVXX8 (destReg);
if (UnpkLoopIteration != IsAligned)
xAND.PS(destReg, ptr128[SSEXYZWMask[0]]);
}
@@ -300,31 +224,12 @@ void VifUnpackSSE_Base::xUPK_V4_32() const {
void VifUnpackSSE_Base::xUPK_V4_16() const {
- if (x86caps.hasStreamingSIMD4Extensions)
- {
- xPMOVXX16 (destReg);
- }
- else
- {
- xMOV64 (destReg, ptr32[srcIndirect]);
- xPUNPCK.LWD(destReg, destReg);
- xShiftR (destReg, 16);
- }
+ xPMOVXX16 (destReg);
}
void VifUnpackSSE_Base::xUPK_V4_8() const {
- if (x86caps.hasStreamingSIMD4Extensions)
- {
- xPMOVXX8 (destReg);
- }
- else
- {
- xMOV32 (destReg, ptr32[srcIndirect]);
- xPUNPCK.LBW(destReg, destReg);
- xPUNPCK.LWD(destReg, destReg);
- xShiftR (destReg, 24);
- }
+ xPMOVXX8 (destReg);
}
void VifUnpackSSE_Base::xUPK_V4_5() const {
diff --git a/plugins/GSdx/CMakeLists.txt b/plugins/GSdx/CMakeLists.txt
index d370c95cf5c5f..ac97b581ad8d8 100644
--- a/plugins/GSdx/CMakeLists.txt
+++ b/plugins/GSdx/CMakeLists.txt
@@ -270,9 +270,7 @@ if(BUILTIN_GS)
else()
if (DISABLE_ADVANCE_SIMD)
# Don't append -SSE2 on the first build to keep same name as SIMD build
- add_pcsx2_plugin("${Output}" "${GSdxFinalSources}" "${GSdxFinalLibs}" "${GSdxFinalFlags}")
- add_pcsx2_plugin("${Output}-SSE4" "${GSdxFinalSources}" "${GSdxFinalLibs}" "${GSdxFinalFlags} -mssse3 -msse4 -msse4.1")
- target_compile_features("${Output}-SSE4" PRIVATE cxx_std_17)
+ add_pcsx2_plugin("${Output}" "${GSdxFinalSources}" "${GSdxFinalLibs}" "${GSdxFinalFlags} -msse3 -msse4 -msse4.1")
add_pcsx2_plugin("${Output}-AVX2" "${GSdxFinalSources}" "${GSdxFinalLibs}" "${GSdxFinalFlags} -mavx -mavx2 -mbmi -mbmi2")
target_compile_features("${Output}-AVX2" PRIVATE cxx_std_17)
else()
diff --git a/plugins/GSdx/GSBlock.h b/plugins/GSdx/GSBlock.h
index 1c38fcb52966e..99b255bcbbe0c 100644
--- a/plugins/GSdx/GSBlock.h
+++ b/plugins/GSdx/GSBlock.h
@@ -158,8 +158,6 @@ class GSBlock
{
GSVector4i v4((int)mask);
- #if _M_SSE >= 0x401
-
if(mask == 0xff000000 || mask == 0x00ffffff)
{
((GSVector4i*)dst)[i * 4 + 0] = ((GSVector4i*)dst)[i * 4 + 0].blend8(v0, v4);
@@ -169,19 +167,11 @@ class GSBlock
}
else
{
-
- #endif
-
- ((GSVector4i*)dst)[i * 4 + 0] = ((GSVector4i*)dst)[i * 4 + 0].blend(v0, v4);
- ((GSVector4i*)dst)[i * 4 + 1] = ((GSVector4i*)dst)[i * 4 + 1].blend(v1, v4);
- ((GSVector4i*)dst)[i * 4 + 2] = ((GSVector4i*)dst)[i * 4 + 2].blend(v2, v4);
- ((GSVector4i*)dst)[i * 4 + 3] = ((GSVector4i*)dst)[i * 4 + 3].blend(v3, v4);
-
- #if _M_SSE >= 0x401
-
+ ((GSVector4i*)dst)[i * 4 + 0] = ((GSVector4i*)dst)[i * 4 + 0].blend(v0, v4);
+ ((GSVector4i*)dst)[i * 4 + 1] = ((GSVector4i*)dst)[i * 4 + 1].blend(v1, v4);
+ ((GSVector4i*)dst)[i * 4 + 2] = ((GSVector4i*)dst)[i * 4 + 2].blend(v2, v4);
+ ((GSVector4i*)dst)[i * 4 + 3] = ((GSVector4i*)dst)[i * 4 + 3].blend(v3, v4);
}
-
- #endif
}
#endif
@@ -524,40 +514,18 @@ class GSBlock
GSVector4i::store(&d1[0], v1);
GSVector4i::store(&d1[1], v3);
- #else
-
- const GSVector4i* s = (const GSVector4i*)src;
-
- GSVector4i v0 = s[i * 4 + 0];
- GSVector4i v1 = s[i * 4 + 1];
- GSVector4i v2 = s[i * 4 + 2];
- GSVector4i v3 = s[i * 4 + 3];
-
- //for(int16 i = 0; i < 8; i++) {v0.i16[i] = i; v1.i16[i] = i + 8; v2.i16[i] = i + 16; v3.i16[i] = i + 24;}
-
- GSVector4i::sw16(v0, v1, v2, v3);
- GSVector4i::sw32(v0, v1, v2, v3);
- GSVector4i::sw16(v0, v2, v1, v3);
-
- GSVector4i* d0 = (GSVector4i*)&dst[dstpitch * 0];
- GSVector4i* d1 = (GSVector4i*)&dst[dstpitch * 1];
-
- GSVector4i::store(&d0[0], v0);
- GSVector4i::store(&d0[1], v1);
- GSVector4i::store(&d1[0], v2);
- GSVector4i::store(&d1[1], v3);
-
#endif
}
template __forceinline static void ReadColumn8(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
{
+
//for(int j = 0; j < 64; j++) ((uint8*)src)[j] = (uint8)j;
- #if 0//_M_SSE >= 0x501
+ #if 0 //_M_SSE >= 0x501
const GSVector8i* s = (const GSVector8i*)src;
-
+
GSVector8i v0 = s[i * 2 + 0];
GSVector8i v1 = s[i * 2 + 1];
@@ -578,7 +546,7 @@ class GSBlock
// TODO: not sure if this is worth it, not in this form, there should be a shorter path
- #elif _M_SSE >= 0x301
+ #else
const GSVector4i* s = (const GSVector4i*)src;
@@ -612,36 +580,6 @@ class GSBlock
GSVector4i::store(&dst[dstpitch * 2], v1);
GSVector4i::store(&dst[dstpitch * 3], v2);
- #else
-
- const GSVector4i* s = (const GSVector4i*)src;
-
- GSVector4i v0 = s[i * 4 + 0];
- GSVector4i v1 = s[i * 4 + 1];
- GSVector4i v2 = s[i * 4 + 2];
- GSVector4i v3 = s[i * 4 + 3];
-
- GSVector4i::sw8(v0, v1, v2, v3);
- GSVector4i::sw16(v0, v1, v2, v3);
- GSVector4i::sw8(v0, v2, v1, v3);
- GSVector4i::sw64(v0, v1, v2, v3);
-
- if((i & 1) == 0)
- {
- v2 = v2.yxwz();
- v3 = v3.yxwz();
- }
- else
- {
- v0 = v0.yxwz();
- v1 = v1.yxwz();
- }
-
- GSVector4i::store(&dst[dstpitch * 0], v0);
- GSVector4i::store(&dst[dstpitch * 1], v1);
- GSVector4i::store(&dst[dstpitch * 2], v2);
- GSVector4i::store(&dst[dstpitch * 3], v3);
-
#endif
}
@@ -649,8 +587,6 @@ class GSBlock
{
//printf("ReadColumn4\n");
- #if _M_SSE >= 0x301
-
const GSVector4i* s = (const GSVector4i*)src;
GSVector4i v0 = s[i * 4 + 0].xzyw();
@@ -680,46 +616,6 @@ class GSBlock
GSVector4i::store(&dst[dstpitch * 1], v1);
GSVector4i::store(&dst[dstpitch * 2], v2);
GSVector4i::store(&dst[dstpitch * 3], v3);
-
- #else
-
- const GSVector4i* s = (const GSVector4i*)src;
-
- GSVector4i v0 = s[i * 4 + 0];
- GSVector4i v1 = s[i * 4 + 1];
- GSVector4i v2 = s[i * 4 + 2];
- GSVector4i v3 = s[i * 4 + 3];
-
- GSVector4i::sw32(v0, v1, v2, v3);
- GSVector4i::sw32(v0, v1, v2, v3);
- GSVector4i::sw4(v0, v2, v1, v3);
- GSVector4i::sw8(v0, v1, v2, v3);
- GSVector4i::sw16(v0, v2, v1, v3);
-
- v0 = v0.xzyw();
- v1 = v1.xzyw();
- v2 = v2.xzyw();
- v3 = v3.xzyw();
-
- GSVector4i::sw64(v0, v1, v2, v3);
-
- if((i & 1) == 0)
- {
- v2 = v2.yxwzlh();
- v3 = v3.yxwzlh();
- }
- else
- {
- v0 = v0.yxwzlh();
- v1 = v1.yxwzlh();
- }
-
- GSVector4i::store(&dst[dstpitch * 0], v0);
- GSVector4i::store(&dst[dstpitch * 1], v1);
- GSVector4i::store(&dst[dstpitch * 2], v2);
- GSVector4i::store(&dst[dstpitch * 3], v3);
-
- #endif
}
static void ReadColumn32(int y, const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
@@ -1238,7 +1134,6 @@ class GSBlock
{
for(int j = 0; j < 8; j++, dst += dstpitch)
{
- #if _M_SSE >= 0x401
const GSVector4i* s = (const GSVector4i*)src;
@@ -1246,15 +1141,6 @@ class GSBlock
GSVector4i v1 = (s[j * 2 + 1] >> 24).gather32_32<>(pal);
((GSVector4i*)dst)[0] = v0.pu32(v1);
-
- #else
-
- for(int i = 0; i < 8; i++)
- {
- ((uint16*)dst)[i] = (uint16)pal[src[j * 8 + i] >> 24];
- }
-
- #endif
}
}
@@ -1273,23 +1159,12 @@ class GSBlock
{
for(int j = 0; j < 8; j++, dst += dstpitch)
{
- #if _M_SSE >= 0x401
-
const GSVector4i* s = (const GSVector4i*)src;
GSVector4i v0 = ((s[j * 2 + 0] >> 24) & 0xf).gather32_32<>(pal);
GSVector4i v1 = ((s[j * 2 + 1] >> 24) & 0xf).gather32_32<>(pal);
((GSVector4i*)dst)[0] = v0.pu32(v1);
-
- #else
-
- for(int i = 0; i < 8; i++)
- {
- ((uint16*)dst)[i] = (uint16)pal[(src[j * 8 + i] >> 24) & 0xf];
- }
-
- #endif
}
}
@@ -1308,23 +1183,12 @@ class GSBlock
{
for(int j = 0; j < 8; j++, dst += dstpitch)
{
- #if _M_SSE >= 0x401
-
const GSVector4i* s = (const GSVector4i*)src;
GSVector4i v0 = (s[j * 2 + 0] >> 28).gather32_32<>(pal);
GSVector4i v1 = (s[j * 2 + 1] >> 28).gather32_32<>(pal);
((GSVector4i*)dst)[0] = v0.pu32(v1);
-
- #else
-
- for(int i = 0; i < 8; i++)
- {
- ((uint16*)dst)[i] = (uint16)pal[src[j * 8 + i] >> 28];
- }
-
- #endif
}
}
@@ -1486,32 +1350,6 @@ class GSBlock
((GSVector4i*)dst)[i * 4 + 3] = ((GSVector4i*)dst)[i * 4 + 3].blend8(v3, mask);
}
- #else
-
- GSVector4i v0, v1, v2, v3;
- GSVector4i mask = GSVector4i::xff000000();
-
- for(int i = 0; i < 4; i++, src += srcpitch * 2)
- {
- v4 = GSVector4i::loadl(&src[srcpitch * 0]);
- v5 = GSVector4i::loadl(&src[srcpitch * 1]);
-
- v6 = v4.upl16(v5);
-
- v4 = v6.upl8(v6);
- v5 = v6.uph8(v6);
-
- v0 = v4.upl16(v4);
- v1 = v4.uph16(v4);
- v2 = v5.upl16(v5);
- v3 = v5.uph16(v5);
-
- ((GSVector4i*)dst)[i * 4 + 0] = ((GSVector4i*)dst)[i * 4 + 0].blend8(v0, mask);
- ((GSVector4i*)dst)[i * 4 + 1] = ((GSVector4i*)dst)[i * 4 + 1].blend8(v1, mask);
- ((GSVector4i*)dst)[i * 4 + 2] = ((GSVector4i*)dst)[i * 4 + 2].blend8(v2, mask);
- ((GSVector4i*)dst)[i * 4 + 3] = ((GSVector4i*)dst)[i * 4 + 3].blend8(v3, mask);
- }
-
#endif
}
@@ -1608,47 +1446,6 @@ class GSBlock
((GSVector4i*)dst)[i * 8 + 7] = ((GSVector4i*)dst)[i * 8 + 7].blend(v3, mask);
}
- #else
-
- GSVector4i v0, v1, v2, v3;
- GSVector4i mask = GSVector4i(0x0f000000);
-
- for(int i = 0; i < 2; i++, src += srcpitch * 4)
- {
- GSVector4i v(*(uint32*)&src[srcpitch * 0], *(uint32*)&src[srcpitch * 2], *(uint32*)&src[srcpitch * 1], *(uint32*)&src[srcpitch * 3]);
-
- v4 = v.upl8(v >> 4);
- v5 = v.uph8(v >> 4);
-
- v6 = v4.upl16(v5);
- v7 = v4.uph16(v5);
-
- v4 = v6.upl8(v6);
- v5 = v6.uph8(v6);
- v6 = v7.upl8(v7);
- v7 = v7.uph8(v7);
-
- v0 = v4.upl16(v4);
- v1 = v4.uph16(v4);
- v2 = v5.upl16(v5);
- v3 = v5.uph16(v5);
-
- ((GSVector4i*)dst)[i * 8 + 0] = ((GSVector4i*)dst)[i * 8 + 0].blend(v0, mask);
- ((GSVector4i*)dst)[i * 8 + 1] = ((GSVector4i*)dst)[i * 8 + 1].blend(v1, mask);
- ((GSVector4i*)dst)[i * 8 + 2] = ((GSVector4i*)dst)[i * 8 + 2].blend(v2, mask);
- ((GSVector4i*)dst)[i * 8 + 3] = ((GSVector4i*)dst)[i * 8 + 3].blend(v3, mask);
-
- v0 = v6.upl16(v6);
- v1 = v6.uph16(v6);
- v2 = v7.upl16(v7);
- v3 = v7.uph16(v7);
-
- ((GSVector4i*)dst)[i * 8 + 4] = ((GSVector4i*)dst)[i * 8 + 4].blend(v0, mask);
- ((GSVector4i*)dst)[i * 8 + 5] = ((GSVector4i*)dst)[i * 8 + 5].blend(v1, mask);
- ((GSVector4i*)dst)[i * 8 + 6] = ((GSVector4i*)dst)[i * 8 + 6].blend(v2, mask);
- ((GSVector4i*)dst)[i * 8 + 7] = ((GSVector4i*)dst)[i * 8 + 7].blend(v3, mask);
- }
-
#endif
}
@@ -1736,47 +1533,6 @@ class GSBlock
((GSVector4i*)dst)[i * 8 + 7] = ((GSVector4i*)dst)[i * 8 + 7].blend(v3, mask);
}
- #else
-
- GSVector4i v0, v1, v2, v3;
- GSVector4i mask = GSVector4i::xf0000000();
-
- for(int i = 0; i < 2; i++, src += srcpitch * 4)
- {
- GSVector4i v(*(uint32*)&src[srcpitch * 0], *(uint32*)&src[srcpitch * 2], *(uint32*)&src[srcpitch * 1], *(uint32*)&src[srcpitch * 3]);
-
- v4 = (v << 4).upl8(v);
- v5 = (v << 4).uph8(v);
-
- v6 = v4.upl16(v5);
- v7 = v4.uph16(v5);
-
- v4 = v6.upl8(v6);
- v5 = v6.uph8(v6);
- v6 = v7.upl8(v7);
- v7 = v7.uph8(v7);
-
- v0 = v4.upl16(v4);
- v1 = v4.uph16(v4);
- v2 = v5.upl16(v5);
- v3 = v5.uph16(v5);
-
- ((GSVector4i*)dst)[i * 8 + 0] = ((GSVector4i*)dst)[i * 8 + 0].blend(v0, mask);
- ((GSVector4i*)dst)[i * 8 + 1] = ((GSVector4i*)dst)[i * 8 + 1].blend(v1, mask);
- ((GSVector4i*)dst)[i * 8 + 2] = ((GSVector4i*)dst)[i * 8 + 2].blend(v2, mask);
- ((GSVector4i*)dst)[i * 8 + 3] = ((GSVector4i*)dst)[i * 8 + 3].blend(v3, mask);
-
- v0 = v6.upl16(v6);
- v1 = v6.uph16(v6);
- v2 = v7.upl16(v7);
- v3 = v7.uph16(v7);
-
- ((GSVector4i*)dst)[i * 8 + 4] = ((GSVector4i*)dst)[i * 8 + 4].blend(v0, mask);
- ((GSVector4i*)dst)[i * 8 + 5] = ((GSVector4i*)dst)[i * 8 + 5].blend(v1, mask);
- ((GSVector4i*)dst)[i * 8 + 6] = ((GSVector4i*)dst)[i * 8 + 6].blend(v2, mask);
- ((GSVector4i*)dst)[i * 8 + 7] = ((GSVector4i*)dst)[i * 8 + 7].blend(v3, mask);
- }
-
#endif
}
@@ -1882,39 +1638,6 @@ class GSBlock
d1[1] = Expand16to32(v1.uph16(v1), TA0, TA1);
}
- #elif 0 // not faster
-
- const GSVector4i* s = (const GSVector4i*)src;
-
- GSVector4i TA0(TEXA.TA0 << 24);
- GSVector4i TA1(TEXA.TA1 << 24);
-
- for(int i = 0; i < 4; i++, dst += dstpitch * 2)
- {
- GSVector4i v0 = s[i * 4 + 0];
- GSVector4i v1 = s[i * 4 + 1];
- GSVector4i v2 = s[i * 4 + 2];
- GSVector4i v3 = s[i * 4 + 3];
-
- GSVector4i::sw16(v0, v1, v2, v3);
- GSVector4i::sw32(v0, v1, v2, v3);
- GSVector4i::sw16(v0, v2, v1, v3);
-
- GSVector4i* d0 = (GSVector4i*)&dst[dstpitch * 0];
-
- d0[0] = Expand16to32(v0.upl16(v0), TA0, TA1);
- d0[1] = Expand16to32(v0.uph16(v0), TA0, TA1);
- d0[2] = Expand16to32(v1.upl16(v1), TA0, TA1);
- d0[3] = Expand16to32(v1.uph16(v1), TA0, TA1);
-
- GSVector4i* d1 = (GSVector4i*)&dst[dstpitch * 1];
-
- d1[0] = Expand16to32(v2.upl16(v2), TA0, TA1);
- d1[1] = Expand16to32(v2.uph16(v2), TA0, TA1);
- d1[2] = Expand16to32(v3.upl16(v3), TA0, TA1);
- d1[3] = Expand16to32(v3.uph16(v3), TA0, TA1);
- }
-
#else
alignas(32) uint16 block[16 * 8];
@@ -1930,8 +1653,6 @@ class GSBlock
{
//printf("ReadAndExpandBlock8_32\n");
- #if _M_SSE >= 0x401
-
const GSVector4i* s = (const GSVector4i*)src;
GSVector4i v0, v1, v2, v3;
@@ -1973,16 +1694,6 @@ class GSBlock
v2.gather32_8<>(pal, (GSVector4i*)dst);
dst += dstpitch;
}
-
- #else
-
- alignas(32) uint8 block[16 * 16];
-
- ReadBlock8(src, (uint8*)block, sizeof(block) / 16);
-
- ExpandBlock8_32(block, dst, dstpitch, pal);
-
- #endif
}
// TODO: ReadAndExpandBlock8_16
@@ -1991,8 +1702,6 @@ class GSBlock
{
//printf("ReadAndExpandBlock4_32\n");
- #if _M_SSE >= 0x401
-
const GSVector4i* s = (const GSVector4i*)src;
GSVector4i v0, v1, v2, v3;
@@ -2050,16 +1759,6 @@ class GSBlock
v3.gather64_8<>(pal, (GSVector4i*)dst);
dst += dstpitch;
}
-
- #else
-
- alignas(32) uint8 block[(32 / 2) * 16];
-
- ReadBlock4(src, (uint8*)block, sizeof(block) / 16);
-
- ExpandBlock4_32(block, dst, dstpitch, pal);
-
- #endif
}
// TODO: ReadAndExpandBlock4_16
@@ -2068,8 +1767,6 @@ class GSBlock
{
//printf("ReadAndExpandBlock8H_32\n");
- #if _M_SSE >= 0x401
-
const GSVector4i* s = (const GSVector4i*)src;
GSVector4i v0, v1, v2, v3;
@@ -2093,16 +1790,6 @@ class GSBlock
dst += dstpitch;
}
-
- #else
-
- alignas(32) uint32 block[8 * 8];
-
- ReadBlock32(src, (uint8*)block, sizeof(block) / 8);
-
- ExpandBlock8H_32(block, dst, dstpitch, pal);
-
- #endif
}
// TODO: ReadAndExpandBlock8H_16
@@ -2110,9 +1797,6 @@ class GSBlock
__forceinline static void ReadAndExpandBlock4HL_32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal)
{
//printf("ReadAndExpandBlock4HL_32\n");
-
- #if _M_SSE >= 0x401
-
const GSVector4i* s = (const GSVector4i*)src;
GSVector4i v0, v1, v2, v3;
@@ -2136,16 +1820,6 @@ class GSBlock
dst += dstpitch;
}
-
- #else
-
- alignas(32) uint32 block[8 * 8];
-
- ReadBlock32(src, (uint8*)block, sizeof(block) / 8);
-
- ExpandBlock4HL_32(block, dst, dstpitch, pal);
-
- #endif
}
// TODO: ReadAndExpandBlock4HL_16
@@ -2154,8 +1828,6 @@ class GSBlock
{
//printf("ReadAndExpandBlock4HH_32\n");
- #if _M_SSE >= 0x401
-
const GSVector4i* s = (const GSVector4i*)src;
GSVector4i v0, v1, v2, v3;
@@ -2179,16 +1851,6 @@ class GSBlock
dst += dstpitch;
}
-
- #else
-
- alignas(32) uint32 block[8 * 8];
-
- ReadBlock32(src, (uint8*)block, sizeof(block) / 8);
-
- ExpandBlock4HH_32(block, dst, dstpitch, pal);
-
- #endif
}
// TODO: ReadAndExpandBlock4HH_16
diff --git a/plugins/GSdx/GSState.cpp b/plugins/GSdx/GSState.cpp
index ccea31e96b00a..cdaee1f341126 100644
--- a/plugins/GSdx/GSState.cpp
+++ b/plugins/GSdx/GSState.cpp
@@ -2734,11 +2734,7 @@ __forceinline void GSState::VertexKick(uint32 skip)
GSVector4i xy = v1.xxxx().u16to32().sub32(m_ofxy);
-#if _M_SSE >= 0x401
GSVector4i::storel(&m_vertex.xy[xy_tail & 3], xy.blend16<0xf0>(xy.sra32(4)).ps32());
-#else
- GSVector4i::storel(&m_vertex.xy[xy_tail & 3], xy.upl64(xy.sra32(4).zwzw()).ps32());
-#endif
m_vertex.tail = ++tail;
m_vertex.xy_tail = ++xy_tail;
diff --git a/plugins/GSdx/GSUtil.cpp b/plugins/GSdx/GSUtil.cpp
index b5f0f06d2c807..08f8a352cd4d2 100644
--- a/plugins/GSdx/GSUtil.cpp
+++ b/plugins/GSdx/GSUtil.cpp
@@ -85,10 +85,6 @@ const char* GSUtil::GetLibName()
"AVX", sw_sse
#elif _M_SSE >= 0x401
"SSE4.1", sw_sse
-#elif _M_SSE >= 0x301
- "SSSE3", sw_sse
-#elif _M_SSE >= 0x200
- "SSE2", sw_sse
#endif
);
@@ -221,13 +217,7 @@ bool GSUtil::CheckSSE()
};
ISA checks[] = {
- {Xbyak::util::Cpu::tSSE2, "SSE2"},
-#if _M_SSE >= 0x301
- {Xbyak::util::Cpu::tSSSE3, "SSSE3"},
-#endif
-#if _M_SSE >= 0x401
{Xbyak::util::Cpu::tSSE41, "SSE41"},
-#endif
#if _M_SSE >= 0x500
{Xbyak::util::Cpu::tAVX, "AVX1"},
#endif
diff --git a/plugins/GSdx/GSVector4.h b/plugins/GSdx/GSVector4.h
index f97a8db817ae4..77422cb351c99 100644
--- a/plugins/GSdx/GSVector4.h
+++ b/plugins/GSdx/GSVector4.h
@@ -250,33 +250,7 @@ class alignas(16) GSVector4
template __forceinline GSVector4 round() const
{
- #if _M_SSE >= 0x401
-
return GSVector4(_mm_round_ps(m, mode));
-
- #else
-
- GSVector4 a = *this;
-
- GSVector4 b = (a & cast(GSVector4i::x80000000())) | m_x4b000000;
-
- b = a + b - b;
-
- if((mode & 7) == (Round_NegInf & 7))
- {
- return b - ((a < b) & m_one);
- }
-
- if((mode & 7) == (Round_PosInf & 7))
- {
- return b + ((a > b) & m_one);
- }
-
- ASSERT((mode & 7) == (Round_NearestInt & 7)); // other modes aren't implemented
-
- return b;
-
- #endif
}
__forceinline GSVector4 floor() const
@@ -404,65 +378,29 @@ class alignas(16) GSVector4
__forceinline GSVector4 hadd() const
{
- #if _M_SSE >= 0x300
-
return GSVector4(_mm_hadd_ps(m, m));
-
- #else
-
- return xzxz() + ywyw();
-
- #endif
}
__forceinline GSVector4 hadd(const GSVector4& v) const
{
- #if _M_SSE >= 0x300
-
return GSVector4(_mm_hadd_ps(m, v.m));
-
- #else
-
- return xzxz(v) + ywyw(v);
-
- #endif
}
__forceinline GSVector4 hsub() const
{
- #if _M_SSE >= 0x300
-
return GSVector4(_mm_hsub_ps(m, m));
-
- #else
-
- return xzxz() - ywyw();
-
- #endif
}
__forceinline GSVector4 hsub(const GSVector4& v) const
{
- #if _M_SSE >= 0x300
-
return GSVector4(_mm_hsub_ps(m, v.m));
-
- #else
-
- return xzxz(v) - ywyw(v);
-
- #endif
}
- #if _M_SSE >= 0x401
-
template __forceinline GSVector4 dp(const GSVector4& v) const
{
return GSVector4(_mm_dp_ps(m, v.m, i));
}
- #endif
-
__forceinline GSVector4 sat(const GSVector4& a, const GSVector4& b) const
{
return GSVector4(_mm_min_ps(_mm_max_ps(m, a), b));
@@ -493,26 +431,14 @@ class alignas(16) GSVector4
return GSVector4(_mm_max_ps(m, a));
}
- #if _M_SSE >= 0x401
-
template __forceinline GSVector4 blend32(const GSVector4& a) const
{
return GSVector4(_mm_blend_ps(m, a, mask));
}
- #endif
-
__forceinline GSVector4 blend32(const GSVector4& a, const GSVector4& mask) const
{
- #if _M_SSE >= 0x401
-
return GSVector4(_mm_blendv_ps(m, a, mask));
-
- #else
-
- return GSVector4(_mm_or_ps(_mm_andnot_ps(mask, m), _mm_and_ps(mask, a)));
-
- #endif
}
__forceinline GSVector4 upl(const GSVector4& a) const
@@ -566,16 +492,10 @@ class alignas(16) GSVector4
return _mm_testz_ps(m, m) != 0;
- #elif _M_SSE >= 0x401
-
__m128i a = _mm_castps_si128(m);
return _mm_testz_si128(a, a) != 0;
- #else
-
- return mask() == 0;
-
#endif
}
@@ -643,7 +563,6 @@ class alignas(16) GSVector4
}
#endif
-
}
#ifdef __linux__
@@ -663,28 +582,12 @@ GSVector.h:2973:15: error: shadows template parm 'int i'
template __forceinline int extract32() const
{
- #if _M_SSE >= 0x401
-
return _mm_extract_ps(m, index);
-
- #else
-
- return i32[index];
-
- #endif
}
#else
template __forceinline int extract32() const
{
- #if _M_SSE >= 0x401
-
return _mm_extract_ps(m, i);
-
- #else
-
- return i32[i];
-
- #endif
}
#endif
diff --git a/plugins/GSdx/GSVector4i.h b/plugins/GSdx/GSVector4i.h
index db376cdd57afe..e3c6dc5808d35 100644
--- a/plugins/GSdx/GSVector4i.h
+++ b/plugins/GSdx/GSVector4i.h
@@ -229,15 +229,7 @@ class alignas(16) GSVector4i
__forceinline GSVector4i runion_ordered(const GSVector4i& a) const
{
- #if _M_SSE >= 0x401
-
return min_i32(a).upl64(max_i32(a).srl<8>());
-
- #else
-
- return GSVector4i(std::min(x, a.x), std::min(y, a.y), std::max(z, a.z), std::max(w, a.w));
-
- #endif
}
__forceinline GSVector4i rintersect(const GSVector4i& a) const
@@ -295,8 +287,6 @@ class alignas(16) GSVector4i
return (uint32)store(v);
}
- #if _M_SSE >= 0x401
-
__forceinline GSVector4i sat_i8(const GSVector4i& a, const GSVector4i& b) const
{
return max_i8(a).min_i8(b);
@@ -307,8 +297,6 @@ class alignas(16) GSVector4i
return max_i8(a.xyxy()).min_i8(a.zwzw());
}
- #endif
-
__forceinline GSVector4i sat_i16(const GSVector4i& a, const GSVector4i& b) const
{
return max_i16(a).min_i16(b);
@@ -319,8 +307,6 @@ class alignas(16) GSVector4i
return max_i16(a.xyxy()).min_i16(a.zwzw());
}
- #if _M_SSE >= 0x401
-
__forceinline GSVector4i sat_i32(const GSVector4i& a, const GSVector4i& b) const
{
return max_i32(a).min_i32(b);
@@ -331,34 +317,6 @@ class alignas(16) GSVector4i
return max_i32(a.xyxy()).min_i32(a.zwzw());
}
- #else
-
- __forceinline GSVector4i sat_i32(const GSVector4i& a, const GSVector4i& b) const
- {
- GSVector4i v;
-
- v.x = std::min(std::max(x, a.x), b.x);
- v.y = std::min(std::max(y, a.y), b.y);
- v.z = std::min(std::max(z, a.z), b.z);
- v.w = std::min(std::max(w, a.w), b.w);
-
- return v;
- }
-
- __forceinline GSVector4i sat_i32(const GSVector4i& a) const
- {
- GSVector4i v;
-
- v.x = std::min(std::max(x, a.x), a.z);
- v.y = std::min(std::max(y, a.y), a.w);
- v.z = std::min(std::max(z, a.x), a.z);
- v.w = std::min(std::max(w, a.y), a.w);
-
- return v;
- }
-
- #endif
-
__forceinline GSVector4i sat_u8(const GSVector4i& a, const GSVector4i& b) const
{
return max_u8(a).min_u8(b);
@@ -369,8 +327,6 @@ class alignas(16) GSVector4i
return max_u8(a.xyxy()).min_u8(a.zwzw());
}
- #if _M_SSE >= 0x401
-
__forceinline GSVector4i sat_u16(const GSVector4i& a, const GSVector4i& b) const
{
return max_u16(a).min_u16(b);
@@ -381,10 +337,6 @@ class alignas(16) GSVector4i
return max_u16(a.xyxy()).min_u16(a.zwzw());
}
- #endif
-
- #if _M_SSE >= 0x401
-
__forceinline GSVector4i sat_u32(const GSVector4i& a, const GSVector4i& b) const
{
return max_u32(a).min_u32(b);
@@ -395,10 +347,6 @@ class alignas(16) GSVector4i
return max_u32(a.xyxy()).min_u32(a.zwzw());
}
- #endif
-
- #if _M_SSE >= 0x401
-
__forceinline GSVector4i min_i8(const GSVector4i& a) const
{
return GSVector4i(_mm_min_epi8(m, a));
@@ -409,8 +357,6 @@ class alignas(16) GSVector4i
return GSVector4i(_mm_max_epi8(m, a));
}
- #endif
-
__forceinline GSVector4i min_i16(const GSVector4i& a) const
{
return GSVector4i(_mm_min_epi16(m, a));
@@ -421,8 +367,6 @@ class alignas(16) GSVector4i
return GSVector4i(_mm_max_epi16(m, a));
}
- #if _M_SSE >= 0x401
-
__forceinline GSVector4i min_i32(const GSVector4i& a) const
{
return GSVector4i(_mm_min_epi32(m, a));
@@ -433,8 +377,6 @@ class alignas(16) GSVector4i
return GSVector4i(_mm_max_epi32(m, a));
}
- #endif
-
__forceinline GSVector4i min_u8(const GSVector4i& a) const
{
return GSVector4i(_mm_min_epu8(m, a));
@@ -445,8 +387,6 @@ class alignas(16) GSVector4i
return GSVector4i(_mm_max_epu8(m, a));
}
- #if _M_SSE >= 0x401
-
__forceinline GSVector4i min_u16(const GSVector4i& a) const
{
return GSVector4i(_mm_min_epu16(m, a));
@@ -467,8 +407,6 @@ class alignas(16) GSVector4i
return GSVector4i(_mm_max_epu32(m, a));
}
- #endif
-
__forceinline static int min_i16(int a, int b)
{
return store(load(a).min_i16(load(b)));
@@ -481,26 +419,14 @@ class alignas(16) GSVector4i
__forceinline GSVector4i blend8(const GSVector4i& a, const GSVector4i& mask) const
{
- #if _M_SSE >= 0x401
-
return GSVector4i(_mm_blendv_epi8(m, a, mask));
-
- #else
-
- return GSVector4i(_mm_or_si128(_mm_andnot_si128(mask, m), _mm_and_si128(mask, a)));
-
- #endif
}
- #if _M_SSE >= 0x401
-
template __forceinline GSVector4i blend16(const GSVector4i& a) const
{
return GSVector4i(_mm_blend_epi16(m, a, mask));
}
- #endif
-
#if _M_SSE >= 0x501
template __forceinline GSVector4i blend32(const GSVector4i& v) const
@@ -517,26 +443,14 @@ class alignas(16) GSVector4i
__forceinline GSVector4i mix16(const GSVector4i& a) const
{
- #if _M_SSE >= 0x401
-
return blend16<0xaa>(a);
-
- #else
-
- return blend8(a, GSVector4i::xffff0000());
-
- #endif
}
- #if _M_SSE >= 0x301
-
__forceinline GSVector4i shuffle8(const GSVector4i& mask) const
{
return GSVector4i(_mm_shuffle_epi8(m, mask));
}
- #endif
-
__forceinline GSVector4i ps16(const GSVector4i& a) const
{
return GSVector4i(_mm_packs_epi16(m, a));
@@ -567,8 +481,6 @@ class alignas(16) GSVector4i
return GSVector4i(_mm_packs_epi32(m, m));
}
- #if _M_SSE >= 0x401
-
__forceinline GSVector4i pu32(const GSVector4i& a) const
{
return GSVector4i(_mm_packus_epi32(m, a));
@@ -579,8 +491,6 @@ class alignas(16) GSVector4i
return GSVector4i(_mm_packus_epi32(m, m));
}
- #endif
-
__forceinline GSVector4i upl8(const GSVector4i& a) const
{
return GSVector4i(_mm_unpacklo_epi8(m, a));
@@ -685,8 +595,6 @@ class alignas(16) GSVector4i
return GSVector4i(_mm_unpackhi_epi64(m, _mm_setzero_si128()));
}
- #if _M_SSE >= 0x401
-
// WARNING!!!
//
// MSVC (2008, 2010 ctp) believes that there is a "mem, reg" form of the pmovz/sx* instructions,
@@ -752,50 +660,6 @@ class alignas(16) GSVector4i
return GSVector4i(_mm_cvtepu32_epi64(m));
}
- #else
-
- __forceinline GSVector4i u8to16() const
- {
- return upl8();
- }
-
- __forceinline GSVector4i u8to32() const
- {
- return upl8().upl16();
- }
-
- __forceinline GSVector4i u8to64() const
- {
- return upl8().upl16().upl32();
- }
-
- __forceinline GSVector4i u16to32() const
- {
- return upl16();
- }
-
- __forceinline GSVector4i u16to64() const
- {
- return upl16().upl32();
- }
-
- __forceinline GSVector4i u32to64() const
- {
- return upl32();
- }
-
- __forceinline GSVector4i i8to16() const
- {
- return zero().upl8(*this).sra16(8);
- }
-
- __forceinline GSVector4i i16to32() const
- {
- return zero().upl16(*this).sra32(16);
- }
-
- #endif
-
template __forceinline GSVector4i srl() const
{
return GSVector4i(_mm_srli_si128(m, i));
@@ -803,20 +667,7 @@ class alignas(16) GSVector4i
template __forceinline GSVector4i srl(const GSVector4i& v)
{
- #if _M_SSE >= 0x301
-
return GSVector4i(_mm_alignr_epi8(v.m, m, i));
-
- #else
-
- // The `& 0xF` keeps the compiler happy on cases that won't actually be hit
- if(i == 0) return *this;
- else if(i < 16) return srl() | v.sll<(16 - i) & 0xF>();
- else if(i == 16) return v;
- else if(i < 32) return v.srl<(i - 16) & 0xF>();
- else return zero();
-
- #endif
}
template __forceinline GSVector4i sll() const
@@ -1013,15 +864,11 @@ class alignas(16) GSVector4i
return GSVector4i(_mm_mullo_epi16(m, v.m));
}
- #if _M_SSE >= 0x301
-
__forceinline GSVector4i mul16hrs(const GSVector4i& v) const
{
return GSVector4i(_mm_mulhrs_epi16(m, v.m));
}
- #endif
-
GSVector4i madd(const GSVector4i& v) const
{
return GSVector4i(_mm_madd_epi16(m, v.m));
@@ -1073,21 +920,11 @@ class alignas(16) GSVector4i
__forceinline bool eq(const GSVector4i& v) const
{
- #if _M_SSE >= 0x401
-
// pxor, ptest, je
GSVector4i t = *this ^ v;
return _mm_testz_si128(t, t) != 0;
-
- #else
-
- // pcmpeqd, pmovmskb, cmp, je
-
- return eq32(v).alltrue();
-
- #endif
}
__forceinline GSVector4i eq8(const GSVector4i& v) const
@@ -1167,37 +1004,17 @@ class alignas(16) GSVector4i
__forceinline bool allfalse() const
{
- #if _M_SSE >= 0x401
-
return _mm_testz_si128(m, m) != 0;
-
- #else
-
- return mask() == 0;
-
- #endif
}
- #if _M_SSE >= 0x401
-
template __forceinline GSVector4i insert8(int a) const
{
return GSVector4i(_mm_insert_epi8(m, a, i));
}
- #endif
-
template __forceinline int extract8() const
{
- #if _M_SSE >= 0x401
-
return _mm_extract_epi8(m, i);
-
- #else
-
- return (int)u8[i];
-
- #endif
}
template __forceinline GSVector4i insert16(int a) const
@@ -1210,59 +1027,34 @@ class alignas(16) GSVector4i
return _mm_extract_epi16(m, i);
}
- #if _M_SSE >= 0x401
-
template __forceinline GSVector4i insert32(int a) const
{
return GSVector4i(_mm_insert_epi32(m, a, i));
}
- #endif
-
template __forceinline int extract32() const
{
if(i == 0) return GSVector4i::store(*this);
- #if _M_SSE >= 0x401
-
return _mm_extract_epi32(m, i);
-
- #else
-
- return i32[i];
-
- #endif
}
#ifdef _M_AMD64
- #if _M_SSE >= 0x401
-
template __forceinline GSVector4i insert64(int64 a) const
{
return GSVector4i(_mm_insert_epi64(m, a, i));
}
- #endif
-
template __forceinline int64 extract64() const
{
if(i == 0) return GSVector4i::storeq(*this);
- #if _M_SSE >= 0x401
-
return _mm_extract_epi64(m, i);
-
- #else
-
- return i64[i];
-
- #endif
}
#endif
- #if _M_SSE >= 0x401
template __forceinline GSVector4i gather8_4(const T* ptr) const
{
@@ -1340,8 +1132,6 @@ class alignas(16) GSVector4i
return v;
}
- #endif
-
template __forceinline GSVector4i gather16_4(const T* ptr) const
{
GSVector4i v;
@@ -1418,8 +1208,6 @@ class alignas(16) GSVector4i
return v;
}
- #if _M_SSE >= 0x401
-
template __forceinline GSVector4i gather32_4(const T* ptr) const
{
GSVector4i v;
@@ -1479,56 +1267,7 @@ class alignas(16) GSVector4i
return v;
}
- #else
-
- template __forceinline GSVector4i gather32_4(const T* ptr) const
- {
- return GSVector4i(
- (int)ptr[extract8() & 0xf],
- (int)ptr[extract8() >> 4],
- (int)ptr[extract8() & 0xf],
- (int)ptr[extract8() >> 4]);
- }
-
- template __forceinline GSVector4i gather32_8(const T* ptr) const
- {
- return GSVector4i(
- (int)ptr[extract8()],
- (int)ptr[extract8()],
- (int)ptr[extract8()],
- (int)ptr[extract8()]);
- }
-
- template __forceinline GSVector4i gather32_16(const T* ptr) const
- {
- return GSVector4i(
- (int)ptr[extract16()],
- (int)ptr[extract16()],
- (int)ptr[extract16()],
- (int)ptr[extract16()]);
- }
-
- template __forceinline GSVector4i gather32_32(const T* ptr) const
- {
- return GSVector4i(
- (int)ptr[extract32<0>()],
- (int)ptr[extract32<1>()],
- (int)ptr[extract32<2>()],
- (int)ptr[extract32<3>()]);
- }
-
- template __forceinline GSVector4i gather32_32(const T1* ptr1, const T2* ptr2) const
- {
- return GSVector4i(
- (int)ptr2[ptr1[extract32<0>()]],
- (int)ptr2[ptr1[extract32<1>()]],
- (int)ptr2[ptr1[extract32<2>()]],
- (int)ptr2[ptr1[extract32<3>()]]);
- }
-
- #endif
-
- #if defined(_M_AMD64) && _M_SSE >= 0x401
+ #if defined(_M_AMD64)
template __forceinline GSVector4i gather64_4(const T* ptr) const
{
@@ -1620,8 +1359,6 @@ class alignas(16) GSVector4i
#endif
- #if _M_SSE >= 0x401
-
template __forceinline void gather8_4(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const
{
dst[0] = gather8_4<0>(ptr);
@@ -1633,8 +1370,6 @@ class alignas(16) GSVector4i
dst[0] = gather8_8<>(ptr);
}
- #endif
-
template __forceinline void gather16_4(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const
{
dst[0] = gather16_4<0>(ptr);
@@ -1742,15 +1477,7 @@ class alignas(16) GSVector4i
__forceinline static GSVector4i loadnt(const void* p)
{
- #if _M_SSE >= 0x401
-
return GSVector4i(_mm_stream_load_si128((__m128i*)p));
-
- #else
-
- return GSVector4i(_mm_load_si128((__m128i*)p));
-
- #endif
}
__forceinline static GSVector4i loadl(const void* p)
diff --git a/plugins/GSdx/GSdx.vcxproj b/plugins/GSdx/GSdx.vcxproj
index a65963c9c5c93..19e9af6a1fbf7 100644
--- a/plugins/GSdx/GSdx.vcxproj
+++ b/plugins/GSdx/GSdx.vcxproj
@@ -17,14 +17,6 @@
Debug
x64
-
- Debug SSE4
- Win32
-
-
- Debug SSE4
- x64
-
Release AVX2
Win32
@@ -41,14 +33,6 @@
Release
x64
-
- Release SSE4
- Win32
-
-
- Release SSE4
- x64
-
@@ -67,8 +51,7 @@
-
-
+
diff --git a/plugins/GSdx/Renderers/Common/GSVertexTrace.cpp b/plugins/GSdx/Renderers/Common/GSVertexTrace.cpp
index 4df7c0e32b658..b093811c7e19d 100644
--- a/plugins/GSdx/Renderers/Common/GSVertexTrace.cpp
+++ b/plugins/GSdx/Renderers/Common/GSVertexTrace.cpp
@@ -175,18 +175,9 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
GSVector4i cmin = GSVector4i::xffffffff();
GSVector4i cmax = GSVector4i::zero();
- #if _M_SSE >= 0x401
-
GSVector4i pmin = GSVector4i::xffffffff();
GSVector4i pmax = GSVector4i::zero();
- #else
-
- GSVector4 pmin = s_minmax.xxxx();
- GSVector4 pmax = s_minmax.yyyy();
-
- #endif
-
const GSVertex* RESTRICT v = (GSVertex*)vertex;
for(int i = 0; i < count; i += n)
@@ -233,21 +224,10 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
GSVector4i xy = xyzf.upl16();
GSVector4i z = xyzf.yyyy();
- #if _M_SSE >= 0x401
-
GSVector4i p = xy.blend16<0xf0>(z.uph32(xyzf));
pmin = pmin.min_u32(p);
pmax = pmax.max_u32(p);
-
- #else
-
- GSVector4 p = GSVector4(xy.upl64(z.srl32(1).upl32(xyzf.wwww())));
-
- pmin = pmin.min(p);
- pmax = pmax.max(p);
-
- #endif
}
else if(primclass == GS_LINE_CLASS)
{
@@ -314,23 +294,11 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
GSVector4i xy1 = xyzf1.upl16();
GSVector4i z1 = xyzf1.yyyy();
- #if _M_SSE >= 0x401
-
GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf0));
GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1));
pmin = pmin.min_u32(p0.min_u32(p1));
pmax = pmax.max_u32(p0.max_u32(p1));
-
- #else
-
- GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf0.wwww())));
- GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww())));
-
- pmin = pmin.min(p0.min(p1));
- pmax = pmax.max(p0.max(p1));
-
- #endif
}
else if(primclass == GS_TRIANGLE_CLASS)
{
@@ -406,25 +374,12 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
GSVector4i xy2 = xyzf2.upl16();
GSVector4i z2 = xyzf2.yyyy();
- #if _M_SSE >= 0x401
-
GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf0));
GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1));
GSVector4i p2 = xy2.blend16<0xf0>(z2.uph32(xyzf2));
pmin = pmin.min_u32(p2).min_u32(p0.min_u32(p1));
pmax = pmax.max_u32(p2).max_u32(p0.max_u32(p1));
-
- #else
-
- GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf0.wwww())));
- GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww())));
- GSVector4 p2 = GSVector4(xy2.upl64(z2.srl32(1).upl32(xyzf2.wwww())));
-
- pmin = pmin.min(p2).min(p0.min(p1));
- pmax = pmax.max(p2).max(p0.max(p1));
-
- #endif
}
else if(primclass == GS_SPRITE_CLASS)
{
@@ -491,23 +446,11 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
GSVector4i xy1 = xyzf1.upl16();
GSVector4i z1 = xyzf1.yyyy();
- #if _M_SSE >= 0x401
-
GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf1));
GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1));
pmin = pmin.min_u32(p0.min_u32(p1));
pmax = pmax.max_u32(p0.max_u32(p1));
-
- #else
-
- GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf1.wwww())));
- GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww())));
-
- pmin = pmin.min(p0.min(p1));
- pmax = pmax.max(p0.max(p1));
-
- #endif
}
}
@@ -516,13 +459,9 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
// be true if depth isn't constant but close enough. It also imply that
// pmin.z & 1 == 0 and pax.z & 1 == 0
- #if _M_SSE >= 0x401
-
pmin = pmin.blend16<0x30>(pmin.srl32(1));
pmax = pmax.blend16<0x30>(pmax.srl32(1));
- #endif
-
GSVector4 o(context->XYOFFSET);
GSVector4 s(1.0f / 16, 1.0f / 16, 2.0f, 1.0f);
diff --git a/plugins/GSdx/Renderers/DX11/GSRendererDX11.cpp b/plugins/GSdx/Renderers/DX11/GSRendererDX11.cpp
index 91726a8490ea7..842e4e329f703 100644
--- a/plugins/GSdx/Renderers/DX11/GSRendererDX11.cpp
+++ b/plugins/GSdx/Renderers/DX11/GSRendererDX11.cpp
@@ -944,12 +944,8 @@ void GSRendererDX11::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sou
m_ps_sel.fog = 1;
GSVector4 fc = GSVector4::rgba32(m_env.FOGCOL.u32[0]);
-#if _M_SSE >= 0x401
// Blend AREF to avoid to load a random value for alpha (dirty cache)
ps_cb.FogColor_AREF = fc.blend32<8>(ps_cb.FogColor_AREF);
-#else
- ps_cb.FogColor_AREF = fc;
-#endif
}
// Warning must be done after EmulateZbuffer
diff --git a/plugins/GSdx/Renderers/OpenGL/GSRendererOGL.cpp b/plugins/GSdx/Renderers/OpenGL/GSRendererOGL.cpp
index 6173e291d8f59..57657f6b1ab48 100644
--- a/plugins/GSdx/Renderers/OpenGL/GSRendererOGL.cpp
+++ b/plugins/GSdx/Renderers/OpenGL/GSRendererOGL.cpp
@@ -1180,12 +1180,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
m_ps_sel.fog = 1;
GSVector4 fc = GSVector4::rgba32(m_env.FOGCOL.u32[0]);
-#if _M_SSE >= 0x401
// Blend AREF to avoid to load a random value for alpha (dirty cache)
ps_cb.FogColor_AREF = fc.blend32<8>(ps_cb.FogColor_AREF);
-#else
- ps_cb.FogColor_AREF = fc;
-#endif
}
// Warning must be done after EmulateZbuffer
diff --git a/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.cpp b/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.cpp
index df6391f10b4df..1508978081570 100644
--- a/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.cpp
+++ b/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.cpp
@@ -121,18 +121,7 @@ void GSDrawScanlineCodeGenerator::mix16(const Xmm& a, const Xmm& b, const Xmm& t
}
else
{
- if(m_cpu.has(util::Cpu::tSSE41))
- {
- pblendw(a, b, 0xaa);
- }
- else
- {
- pcmpeqd(temp, temp);
- psrld(temp, 16);
- pand(a, temp);
- pandn(temp, b);
- por(a, temp);
- }
+ pblendw(a, b, 0xaa);
}
}
@@ -154,17 +143,8 @@ void GSDrawScanlineCodeGenerator::clamp16(const Xmm& a, const Xmm& temp)
}
else
{
- if(m_cpu.has(util::Cpu::tSSE41))
- {
- packuswb(a, a);
- pmovzxbw(a, a);
- }
- else
- {
- packuswb(a, a);
- pxor(temp, temp);
- punpcklbw(a, temp);
- }
+ packuswb(a, a);
+ pmovzxbw(a, a);
}
}
@@ -223,10 +203,8 @@ void GSDrawScanlineCodeGenerator::blend8(const Xmm& a, const Xmm& b)
{
if(m_cpu.has(util::Cpu::tAVX))
vpblendvb(a, a, b, xmm0);
- else if(m_cpu.has(util::Cpu::tSSE41))
+ else
pblendvb(a, b);
- else
- blend(a, b, xmm0);
}
void GSDrawScanlineCodeGenerator::blend8r(const Xmm& b, const Xmm& a)
@@ -235,15 +213,11 @@ void GSDrawScanlineCodeGenerator::blend8r(const Xmm& b, const Xmm& a)
{
vpblendvb(b, a, b, xmm0);
}
- else if(m_cpu.has(util::Cpu::tSSE41))
+ else
{
pblendvb(a, b);
movdqa(b, a);
}
- else
- {
- blendr(b, a, xmm0);
- }
}
void GSDrawScanlineCodeGenerator::split16_2x8(const Xmm& l, const Xmm& h, const Xmm& src)
diff --git a/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.x86.cpp b/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.x86.cpp
index 78711e48a8cbf..ea59e11cf88d9 100644
--- a/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.x86.cpp
+++ b/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.x86.cpp
@@ -644,20 +644,9 @@ void GSDrawScanlineCodeGenerator::TestZ_SSE(const Xmm& temp1, const Xmm& temp2)
// Clamp Z to ZPSM_FMT_MAX
if (m_sel.zclamp)
{
-#if _M_SSE >= 0x401
pcmpeqd(temp1, temp1);
psrld(temp1, (uint8)((m_sel.zpsm & 0x3) * 8));
pminsd(xmm0, temp1);
-#else
- pcmpeqd(temp1, temp1);
- psrld(temp1, (uint8)((m_sel.zpsm & 0x3) * 8));
- pcmpgtd(temp1, xmm0);
- pand(xmm0, temp1);
- pcmpeqd(temp2, temp2);
- pxor(temp1, temp2);
- psrld(temp1, (uint8)((m_sel.zpsm & 0x3) * 8));
- por(xmm0, temp1);
-#endif
}
if(m_sel.zwrite)
@@ -1089,15 +1078,7 @@ void GSDrawScanlineCodeGenerator::Wrap_SSE(const Xmm& uv0, const Xmm& uv1)
movdqa(xmm4, ptr[&m_local.gd->t.min]);
movdqa(xmm5, ptr[&m_local.gd->t.max]);
- if(m_cpu.has(util::Cpu::tSSE41))
- {
- movdqa(xmm0, ptr[&m_local.gd->t.mask]);
- }
- else
- {
- movdqa(xmm0, ptr[&m_local.gd->t.invmask]);
- movdqa(xmm6, xmm0);
- }
+ movdqa(xmm0, ptr[&m_local.gd->t.mask]);
// uv0
@@ -1118,11 +1099,7 @@ void GSDrawScanlineCodeGenerator::Wrap_SSE(const Xmm& uv0, const Xmm& uv1)
pminsw(uv0, xmm5);
// clamp.blend8(repeat, m_local.gd->t.mask);
-
- if(m_cpu.has(util::Cpu::tSSE41))
- pblendvb(uv0, xmm1);
- else
- blendr(uv0, xmm1, xmm0);
+ pblendvb(uv0, xmm1);
// uv1
@@ -1143,11 +1120,7 @@ void GSDrawScanlineCodeGenerator::Wrap_SSE(const Xmm& uv0, const Xmm& uv1)
pminsw(uv1, xmm5);
// clamp.blend8(repeat, m_local.gd->t.mask);
-
- if(m_cpu.has(util::Cpu::tSSE41))
- pblendvb(uv1, xmm1);
- else
- blendr(uv1, xmm1, xmm6);
+ pblendvb(uv1, xmm1);
}
}
@@ -1908,15 +1881,7 @@ void GSDrawScanlineCodeGenerator::WrapLOD_SSE(const Xmm& uv0, const Xmm& uv1)
}
else
{
- if(m_cpu.has(util::Cpu::tSSE41))
- {
- movdqa(xmm0, ptr[&m_local.gd->t.mask]);
- }
- else
- {
- movdqa(xmm0, ptr[&m_local.gd->t.invmask]);
- movdqa(xmm4, xmm0);
- }
+ movdqa(xmm0, ptr[&m_local.gd->t.mask]);
// uv0
@@ -1937,11 +1902,7 @@ void GSDrawScanlineCodeGenerator::WrapLOD_SSE(const Xmm& uv0, const Xmm& uv1)
pminsw(uv0, xmm6);
// clamp.blend8(repeat, m_local.gd->t.mask);
-
- if(m_cpu.has(util::Cpu::tSSE41))
- pblendvb(uv0, xmm1);
- else
- blendr(uv0, xmm1, xmm0);
+ pblendvb(uv0, xmm1);
// uv1
@@ -1963,10 +1924,7 @@ void GSDrawScanlineCodeGenerator::WrapLOD_SSE(const Xmm& uv0, const Xmm& uv1)
// clamp.blend8(repeat, m_local.gd->t.mask);
- if(m_cpu.has(util::Cpu::tSSE41))
- pblendvb(uv1, xmm1);
- else
- blendr(uv1, xmm1, xmm4);
+ pblendvb(uv1, xmm1);
}
}
@@ -2435,20 +2393,9 @@ void GSDrawScanlineCodeGenerator::WriteZBuf_SSE()
// Clamp Z to ZPSM_FMT_MAX
if (m_sel.zclamp)
{
-#if _M_SSE >= 0x401
pcmpeqd(xmm7, xmm7);
psrld(xmm7, (uint8)((m_sel.zpsm & 0x3) * 8));
pminsd(xmm1, xmm7);
-#else
- static GSVector4i all_1s = GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff);
- pcmpeqd(xmm7, xmm7);
- psrld(xmm7, (uint8)((m_sel.zpsm & 0x3) * 8));
- pcmpgtd(xmm7, xmm1);
- pand(xmm1, xmm7);
- pxor(xmm7, ptr[&all_1s]);
- psrld(xmm7, (uint8)((m_sel.zpsm & 0x3) * 8));
- por(xmm1, xmm7);
-#endif
}
bool fast = m_sel.ztest ? m_sel.zpsm < 2 : m_sel.zpsm == 0 && m_sel.notest;
@@ -2669,15 +2616,6 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_SSE()
if(m_sel.pabe)
{
- if(!m_cpu.has(util::Cpu::tSSE41))
- {
- // doh, previous blend8r overwrote xmm0 (sse41 uses pblendvb)
- movdqa(xmm0, xmm4);
- pslld(xmm0, 8);
- psrad(xmm0, 31);
-
- }
-
psrld(xmm0, 16); // zero out high words to select the source alpha in blend (so it also does mix16)
// ga = c[1].blend8(ga, mask).mix16(c[1]);
@@ -2862,24 +2800,13 @@ void GSDrawScanlineCodeGenerator::WritePixel_SSE(const Xmm& src, const Reg32& ad
case 0:
if(i == 0) movd(dst, src);
else {
- if(m_cpu.has(util::Cpu::tSSE41)) {
- pextrd(dst, src, i);
- } else {
- pshufd(xmm0, src, _MM_SHUFFLE(i, i, i, i));
- movd(dst, xmm0);
- }
-
+ pextrd(dst, src, i);
}
break;
case 1:
if(i == 0) movd(eax, src);
else {
- if(m_cpu.has(util::Cpu::tSSE41)) {
- pextrd(eax, src, i);
- } else {
- pshufd(xmm0, src, _MM_SHUFFLE(i, i, i, i));
- movd(eax, xmm0);
- }
+ pextrd(eax, src, i);
}
xor(eax, dst);
and(eax, 0xffffff);
@@ -2918,153 +2845,28 @@ void GSDrawScanlineCodeGenerator::ReadTexel_SSE(int pixels, int mip_offset)
if(m_sel.mmin && !m_sel.lcm)
{
- if(m_cpu.has(util::Cpu::tSSE41))
- {
-
- const int r[] = {5, 6, 2, 4, 0, 1, 3, 7};
-
- if(pixels == 4)
- {
- movdqa(ptr[&m_local.temp.test], xmm7);
- }
+ const int r[] = {5, 6, 2, 4, 0, 1, 3, 7};
- for(uint8 j = 0; j < 4; j++)
- {
- mov(ebx, ptr[&lod_i->u32[j]]);
- mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
-
- for(int i = 0; i < pixels; i++)
- {
- ReadTexel_SSE(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j);
- }
- }
-
- if(pixels == 4)
- {
- movdqa(xmm5, xmm7);
- movdqa(xmm7, ptr[&m_local.temp.test]);
- }
- }
- else
+ if(pixels == 4)
{
+ movdqa(ptr[&m_local.temp.test], xmm7);
+ }
- if(pixels == 4)
- {
- movdqa(ptr[&m_local.temp.test], xmm7);
-
- mov(ebx, ptr[&lod_i->u32[0]]);
- mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
-
- ReadTexel_SSE(xmm6, xmm5, 0);
- psrldq(xmm5, 4);
- ReadTexel_SSE(xmm4, xmm2, 0);
- psrldq(xmm2, 4);
-
- mov(ebx, ptr[&lod_i->u32[1]]);
- mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
-
- ReadTexel_SSE(xmm1, xmm5, 0);
- psrldq(xmm5, 4);
- ReadTexel_SSE(xmm7, xmm2, 0);
- psrldq(xmm2, 4);
-
- punpckldq(xmm6, xmm1);
- punpckldq(xmm4, xmm7);
-
- mov(ebx, ptr[&lod_i->u32[2]]);
- mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
-
- ReadTexel_SSE(xmm1, xmm5, 0);
- psrldq(xmm5, 4);
- ReadTexel_SSE(xmm7, xmm2, 0);
- psrldq(xmm2, 4);
-
- mov(ebx, ptr[&lod_i->u32[3]]);
- mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
-
- ReadTexel_SSE(xmm5, xmm5, 0);
- ReadTexel_SSE(xmm2, xmm2, 0);
-
- punpckldq(xmm1, xmm5);
- punpckldq(xmm7, xmm2);
-
- punpcklqdq(xmm6, xmm1);
- punpcklqdq(xmm4, xmm7);
-
- mov(ebx, ptr[&lod_i->u32[0]]);
- mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
-
- ReadTexel_SSE(xmm1, xmm0, 0);
- psrldq(xmm0, 4);
- ReadTexel_SSE(xmm5, xmm3, 0);
- psrldq(xmm3, 4);
-
- mov(ebx, ptr[&lod_i->u32[1]]);
- mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
-
- ReadTexel_SSE(xmm2, xmm0, 0);
- psrldq(xmm0, 4);
- ReadTexel_SSE(xmm7, xmm3, 0);
- psrldq(xmm3, 4);
-
- punpckldq(xmm1, xmm2);
- punpckldq(xmm5, xmm7);
-
- mov(ebx, ptr[&lod_i->u32[2]]);
- mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
-
- ReadTexel_SSE(xmm2, xmm0, 0);
- psrldq(xmm0, 4);
- ReadTexel_SSE(xmm7, xmm3, 0);
- psrldq(xmm3, 4);
-
- mov(ebx, ptr[&lod_i->u32[3]]);
- mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
-
- ReadTexel_SSE(xmm0, xmm0, 0);
- ReadTexel_SSE(xmm3, xmm3, 0);
-
- punpckldq(xmm2, xmm0);
- punpckldq(xmm7, xmm3);
-
- punpcklqdq(xmm1, xmm2);
- punpcklqdq(xmm5, xmm7);
+ for(uint8 j = 0; j < 4; j++)
+ {
+ mov(ebx, ptr[&lod_i->u32[j]]);
+ mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
- movdqa(xmm7, ptr[&m_local.temp.test]);
- }
- else
+ for(int i = 0; i < pixels; i++)
{
- mov(ebx, ptr[&lod_i->u32[0]]);
- mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
-
- ReadTexel_SSE(xmm6, xmm5, 0);
- psrldq(xmm5, 4); // shuffle instead? (1 2 3 0 ~ rotation)
-
- mov(ebx, ptr[&lod_i->u32[1]]);
- mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
-
- ReadTexel_SSE(xmm1, xmm5, 0);
- psrldq(xmm5, 4);
-
- punpckldq(xmm6, xmm1);
-
- mov(ebx, ptr[&lod_i->u32[2]]);
- mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
-
- ReadTexel_SSE(xmm1, xmm5, 0);
- psrldq(xmm5, 4);
-
- mov(ebx, ptr[&lod_i->u32[3]]);
- mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
-
- ReadTexel_SSE(xmm4, xmm5, 0);
- // psrldq(xmm5, 4);
-
- punpckldq(xmm1, xmm4);
-
- punpcklqdq(xmm6, xmm1);
+ ReadTexel_SSE(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j);
}
+ }
+ if(pixels == 4)
+ {
+ movdqa(xmm5, xmm7);
+ movdqa(xmm7, ptr[&m_local.temp.test]);
}
}
else
@@ -3077,41 +2879,12 @@ void GSDrawScanlineCodeGenerator::ReadTexel_SSE(int pixels, int mip_offset)
const int r[] = {5, 6, 2, 4, 0, 1, 3, 5};
- if(m_cpu.has(util::Cpu::tSSE41))
+ for(int i = 0; i < pixels; i++)
{
- for(int i = 0; i < pixels; i++)
- {
- for(uint8 j = 0; j < 4; j++)
- {
- ReadTexel_SSE(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j);
- }
- }
-
- } else {
- const int t[] = {1, 4, 1, 5, 2, 5, 2, 0};
-
- for(int i = 0; i < pixels; i++)
+ for(uint8 j = 0; j < 4; j++)
{
- const Xmm& addr = Xmm(r[i * 2 + 0]);
- const Xmm& dst = Xmm(r[i * 2 + 1]);
- const Xmm& temp1 = Xmm(t[i * 2 + 0]);
- const Xmm& temp2 = Xmm(t[i * 2 + 1]);
-
- ReadTexel_SSE(dst, addr, 0);
- psrldq(addr, 4); // shuffle instead? (1 2 3 0 ~ rotation)
- ReadTexel_SSE(temp1, addr, 0);
- psrldq(addr, 4);
- punpckldq(dst, temp1);
-
- ReadTexel_SSE(temp1, addr, 0);
- psrldq(addr, 4);
- ReadTexel_SSE(temp2, addr, 0);
- // psrldq(addr, 4);
- punpckldq(temp1, temp2);
-
- punpcklqdq(dst, temp1);
+ ReadTexel_SSE(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j);
}
-
}
}
}
diff --git a/plugins/GSdx/Renderers/SW/GSRendererSW.cpp b/plugins/GSdx/Renderers/SW/GSRendererSW.cpp
index fd424400c1055..cb98275428793 100644
--- a/plugins/GSdx/Renderers/SW/GSRendererSW.cpp
+++ b/plugins/GSdx/Renderers/SW/GSRendererSW.cpp
@@ -277,37 +277,17 @@ void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex*
GSVector4i off = (GSVector4i)m_context->XYOFFSET;
GSVector4 tsize = GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH, 1, 0);
-
- #if _M_SSE >= 0x401
-
GSVector4i z_max = GSVector4i::xffffffff().srl32(GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt * 8);
- #else
-
- uint32_t z_max = 0xffffffff >> (GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt * 8);
-
- #endif
-
for(int i = (int)m_vertex.next; i > 0; i--, src++, dst++)
{
GSVector4 stcq = GSVector4::load(&src->m[0]); // s t rgba q
- #if _M_SSE >= 0x401
-
GSVector4i xyzuvf(src->m[1]);
GSVector4i xy = xyzuvf.upl16() - off;
GSVector4i zf = xyzuvf.ywww().min_u32(GSVector4i::xffffff00());
- #else
-
- uint32 z = src->XYZ.Z;
-
- GSVector4i xy = GSVector4i::load((int)src->XYZ.u32[0]).upl16() - off;
- GSVector4i zf = GSVector4i((int)std::min(z, 0xffffff00), src->FOG); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later
-
- #endif
-
dst->p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * m_pos_scale;
dst->c = GSVector4(GSVector4i::cast(stcq).zzzz().u8to32() << 7);
@@ -317,15 +297,7 @@ void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex*
{
if(fst)
{
- #if _M_SSE >= 0x401
-
t = GSVector4(xyzuvf.uph16() << (16 - 4));
-
- #else
-
- t = GSVector4(GSVector4i::load(src->UV).upl16() << (16 - 4));
-
- #endif
}
else if(q_div)
{
@@ -350,17 +322,8 @@ void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex*
if(primclass == GS_SPRITE_CLASS)
{
- #if _M_SSE >= 0x401
-
xyzuvf = xyzuvf.min_u32(z_max);
t = t.insert32<1, 3>(GSVector4::cast(xyzuvf));
-
- #else
-
- z = std::min(z, z_max);
- t = t.insert32<0, 3>(GSVector4::cast(GSVector4i::load(z)));
-
- #endif
}
dst->t = t;
diff --git a/plugins/GSdx/stdafx.h b/plugins/GSdx/stdafx.h
index cfcb833b554db..e2e9fff37f0da 100644
--- a/plugins/GSdx/stdafx.h
+++ b/plugins/GSdx/stdafx.h
@@ -241,60 +241,39 @@ typedef int64 sint64;
#define _M_SSE 0x500
#elif defined(__SSE4_1__)
#define _M_SSE 0x401
-#elif defined(__SSSE3__)
- #define _M_SSE 0x301
-#elif defined(__SSE2__)
- #define _M_SSE 0x200
#endif
#endif
#if !defined(_M_SSE) && (!defined(_WIN32) || defined(_M_AMD64) || defined(_M_IX86_FP) && _M_IX86_FP >= 2)
- #define _M_SSE 0x200
-
-#endif
-
-#if _M_SSE >= 0x200
-
- #include
- #include
-
- #ifndef _MM_DENORMALS_ARE_ZERO
- #define _MM_DENORMALS_ARE_ZERO 0x0040
- #endif
-
- #define MXCSR (_MM_DENORMALS_ARE_ZERO | _MM_MASK_MASK | _MM_ROUND_NEAREST | _MM_FLUSH_ZERO_ON)
-
- #define _MM_TRANSPOSE4_SI128(row0, row1, row2, row3) \
- { \
- __m128 tmp0 = _mm_shuffle_ps(_mm_castsi128_ps(row0), _mm_castsi128_ps(row1), 0x44); \
- __m128 tmp2 = _mm_shuffle_ps(_mm_castsi128_ps(row0), _mm_castsi128_ps(row1), 0xEE); \
- __m128 tmp1 = _mm_shuffle_ps(_mm_castsi128_ps(row2), _mm_castsi128_ps(row3), 0x44); \
- __m128 tmp3 = _mm_shuffle_ps(_mm_castsi128_ps(row2), _mm_castsi128_ps(row3), 0xEE); \
- (row0) = _mm_castps_si128(_mm_shuffle_ps(tmp0, tmp1, 0x88)); \
- (row1) = _mm_castps_si128(_mm_shuffle_ps(tmp0, tmp1, 0xDD)); \
- (row2) = _mm_castps_si128(_mm_shuffle_ps(tmp2, tmp3, 0x88)); \
- (row3) = _mm_castps_si128(_mm_shuffle_ps(tmp2, tmp3, 0xDD)); \
- }
-
-#else
-
-#error TODO: GSVector4 and GSRasterizer needs SSE2
+ #define _M_SSE 0x401
#endif
-#if _M_SSE >= 0x301
-
- #include
+#include
+#include
+#ifndef _MM_DENORMALS_ARE_ZERO
+#define _MM_DENORMALS_ARE_ZERO 0x0040
#endif
-#if _M_SSE >= 0x401
-
- #include
-
-#endif
+#define MXCSR (_MM_DENORMALS_ARE_ZERO | _MM_MASK_MASK | _MM_ROUND_NEAREST | _MM_FLUSH_ZERO_ON)
+
+#define _MM_TRANSPOSE4_SI128(row0, row1, row2, row3) \
+{ \
+ __m128 tmp0 = _mm_shuffle_ps(_mm_castsi128_ps(row0), _mm_castsi128_ps(row1), 0x44); \
+ __m128 tmp2 = _mm_shuffle_ps(_mm_castsi128_ps(row0), _mm_castsi128_ps(row1), 0xEE); \
+ __m128 tmp1 = _mm_shuffle_ps(_mm_castsi128_ps(row2), _mm_castsi128_ps(row3), 0x44); \
+ __m128 tmp3 = _mm_shuffle_ps(_mm_castsi128_ps(row2), _mm_castsi128_ps(row3), 0xEE); \
+ (row0) = _mm_castps_si128(_mm_shuffle_ps(tmp0, tmp1, 0x88)); \
+ (row1) = _mm_castps_si128(_mm_shuffle_ps(tmp0, tmp1, 0xDD)); \
+ (row2) = _mm_castps_si128(_mm_shuffle_ps(tmp2, tmp3, 0x88)); \
+ (row3) = _mm_castps_si128(_mm_shuffle_ps(tmp2, tmp3, 0xDD)); \
+}
+
+#include
+#include
#if _M_SSE >= 0x500