diff --git a/pcsx2-qt/Settings/AdvancedSettingsWidget.cpp b/pcsx2-qt/Settings/AdvancedSettingsWidget.cpp index acd4903fe4435..fcae67fac797c 100644 --- a/pcsx2-qt/Settings/AdvancedSettingsWidget.cpp +++ b/pcsx2-qt/Settings/AdvancedSettingsWidget.cpp @@ -48,6 +48,16 @@ AdvancedSettingsWidget::AdvancedSettingsWidget(SettingsWindow* dialog, QWidget* connect(m_ui.vu0ClampMode, QOverload::of(&QComboBox::currentIndexChanged), [this](int index) { setClampingMode(0, index); }); connect(m_ui.vu1ClampMode, QOverload::of(&QComboBox::currentIndexChanged), [this](int index) { setClampingMode(1, index); }); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeSoftAddSub, "EmuCore/CPU/Recompiler", "fpuSoftAddSub", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeSoftMulDiv, "EmuCore/CPU/Recompiler", "fpuSoftMulDiv", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeSoftSqrt, "EmuCore/CPU/Recompiler", "fpuSoftSqrt", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu0SoftAddSub, "EmuCore/CPU/Recompiler", "vu0SoftAddSub", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu0SoftMulDiv, "EmuCore/CPU/Recompiler", "vu0SoftMulDiv", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu0SoftSqrt, "EmuCore/CPU/Recompiler", "vu0SoftSqrt", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu1SoftAddSub, "EmuCore/CPU/Recompiler", "vu1SoftAddSub", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu1SoftMulDiv, "EmuCore/CPU/Recompiler", "vu1SoftMulDiv", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu1SoftSqrt, "EmuCore/CPU/Recompiler", "vu1SoftSqrt", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.iopRecompiler, "EmuCore/CPU/Recompiler", "EnableIOP", true); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.gameFixes, "EmuCore", "EnableGameFixes", true); diff --git a/pcsx2-qt/Settings/AdvancedSettingsWidget.ui b/pcsx2-qt/Settings/AdvancedSettingsWidget.ui index 197fda8c7c6c3..aa68c2c5c380e 100644 --- a/pcsx2-qt/Settings/AdvancedSettingsWidget.ui +++ b/pcsx2-qt/Settings/AdvancedSettingsWidget.ui @@ -34,7 +34,7 @@ 0 -447 790 - 1049 + 1283 @@ -94,10 +94,10 @@ - - + + - Division Rounding Mode: + Clamping Mode: @@ -125,38 +125,7 @@ - - - - Clamping Mode: - - - - - - - - None - - - - - Normal (Default) - - - - - Extra + Preserve Sign - - - - - Full - - - - - + @@ -208,6 +177,67 @@ + + + + + + None + + + + + Normal (Default) + + + + + Extra + Preserve Sign + + + + + Full + + + + + + + + Division Rounding Mode: + + + + + + + Software Float + + + + + + Multiplication/Division + + + + + + + Addition/Subtraction + + + + + + + Square Root + + + + + @@ -218,7 +248,7 @@ Vector Units (VU) - + VU1 Rounding Mode: @@ -249,7 +279,129 @@ - + + + + VU1 Clamping Mode: + + + + + + + VU0 Rounding Mode: + + + + + + + VU1 Software Float + + + + + + Multiplication/Division + + + + + + + Addition/Subtraction + + + + + + + Float Square Root + + + + + + + + + + VU0 Software Float + + + + + + Multiplication/Division + + + + + + + Addition/Subtraction + + + + + + + Square Root + + + + + + + + + + + Nearest + + + + + Negative + + + + + Positive + + + + + Chop/Zero (Default) + + + + + + + + + None + + + + + Normal (Default) + + + + + Extra + + + + + Extra + Preserve Sign + + + + + @@ -281,30 +433,6 @@ - - - - - None - - - - - Normal (Default) - - - - - Extra - - - - - Extra + Preserve Sign - - - - @@ -312,45 +440,7 @@ - - - - VU0 Rounding Mode: - - - - - - - VU1 Clamping Mode: - - - - - - - - Nearest - - - - - Negative - - - - - Positive - - - - - Chop/Zero (Default) - - - - - + diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt index 5430675614204..4e0a82db9e977 100644 --- a/pcsx2/CMakeLists.txt +++ b/pcsx2/CMakeLists.txt @@ -93,6 +93,7 @@ set(pcsx2Sources MTGS.cpp MTVU.cpp Patch.cpp + PS2Float.cpp Pcsx2Config.cpp PerformanceMetrics.cpp PrecompiledHeader.cpp @@ -173,6 +174,7 @@ set(pcsx2Headers MTVU.h Memory.h MemoryTypes.h + PS2Float.h Patch.h PerformanceMetrics.h PrecompiledHeader.h diff --git a/pcsx2/Config.h b/pcsx2/Config.h index fc64f1c22bae3..9671b86fbd9e2 100644 --- a/pcsx2/Config.h +++ b/pcsx2/Config.h @@ -596,17 +596,32 @@ struct Pcsx2Config vu0ExtraOverflow : 1, vu0SignOverflow : 1, vu0Underflow : 1; + + bool + vu0SoftAddSub : 1, + vu0SoftMulDiv : 1, + vu0SoftSqrt : 1; bool vu1Overflow : 1, vu1ExtraOverflow : 1, vu1SignOverflow : 1, vu1Underflow : 1; + + bool + vu1SoftAddSub : 1, + vu1SoftMulDiv : 1, + vu1SoftSqrt : 1; bool fpuOverflow : 1, fpuExtraOverflow : 1, fpuFullMode : 1; + + bool + fpuSoftAddSub : 1, + fpuSoftMulDiv : 1, + fpuSoftSqrt : 1; bool EnableEECache : 1; @@ -1426,11 +1441,19 @@ namespace EmuFolders #define CHECK_VU_SIGN_OVERFLOW(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0SignOverflow : EmuConfig.Cpu.Recompiler.vu1SignOverflow) #define CHECK_VU_UNDERFLOW(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0Underflow : EmuConfig.Cpu.Recompiler.vu1Underflow) +#define CHECK_VU_SOFT_ADDSUB(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0SoftAddSub : EmuConfig.Cpu.Recompiler.vu1SoftAddSub) +#define CHECK_VU_SOFT_MULDIV(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0SoftMulDiv : EmuConfig.Cpu.Recompiler.vu1SoftMulDiv) +#define CHECK_VU_SOFT_SQRT(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0SoftSqrt : EmuConfig.Cpu.Recompiler.vu1SoftSqrt) + #define CHECK_FPU_OVERFLOW (EmuConfig.Cpu.Recompiler.fpuOverflow) #define CHECK_FPU_EXTRA_OVERFLOW (EmuConfig.Cpu.Recompiler.fpuExtraOverflow) // If enabled, Operands are checked for infinities before being used in the FPU recs #define CHECK_FPU_EXTRA_FLAGS 1 // Always enabled now // Sets D/I flags on FPU instructions #define CHECK_FPU_FULL (EmuConfig.Cpu.Recompiler.fpuFullMode) +#define CHECK_FPU_SOFT_ADDSUB (EmuConfig.Cpu.Recompiler.fpuSoftAddSub) +#define CHECK_FPU_SOFT_MULDIV (EmuConfig.Cpu.Recompiler.fpuSoftMulDiv) +#define CHECK_FPU_SOFT_SQRT (EmuConfig.Cpu.Recompiler.fpuSoftSqrt) + //------------ EE Recompiler defines - Comment to disable a recompiler --------------- #define SHIFT_RECOMPILE // Speed majorly reduced if disabled diff --git a/pcsx2/FPU.cpp b/pcsx2/FPU.cpp index 3ac1ae3fd178c..d4967dc4675c0 100644 --- a/pcsx2/FPU.cpp +++ b/pcsx2/FPU.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: GPL-3.0+ #include "Common.h" - +#include "PS2Float.h" #include // Helper Macros @@ -63,28 +63,57 @@ // If we have an infinity value, then Overflow has occured. bool checkOverflow(u32& xReg, u32 cFlagsToSet) { - if ((xReg & ~0x80000000) == PosInfinity) { - /*Console.Warning( "FPU OVERFLOW!: Changing to +/-Fmax!!!!!!!!!!!!\n" );*/ - xReg = (xReg & 0x80000000) | posFmax; - _ContVal_ |= (cFlagsToSet); - return true; + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) + { + if (xReg == PS2Float::MAX_FLOATING_POINT_VALUE || xReg == PS2Float::MIN_FLOATING_POINT_VALUE) + { + _ContVal_ |= (cFlagsToSet); + return true; + } + else if (cFlagsToSet & FPUflagO) + _ContVal_ &= ~FPUflagO; + } + else + { + if ((xReg & ~0x80000000) == PosInfinity) + { + /*Console.Warning( "FPU OVERFLOW!: Changing to +/-Fmax!!!!!!!!!!!!\n" );*/ + xReg = (xReg & 0x80000000) | posFmax; + _ContVal_ |= (cFlagsToSet); + return true; + } + else if (cFlagsToSet & FPUflagO) + _ContVal_ &= ~FPUflagO; } - else if (cFlagsToSet & FPUflagO) - _ContVal_ &= ~FPUflagO; return false; } // If we have a denormal value, then Underflow has occured. bool checkUnderflow(u32& xReg, u32 cFlagsToSet) { - if ( ( (xReg & 0x7F800000) == 0 ) && ( (xReg & 0x007FFFFF) != 0 ) ) { - /*Console.Warning( "FPU UNDERFLOW!: Changing to +/-0!!!!!!!!!!!!\n" );*/ - xReg &= 0x80000000; - _ContVal_ |= (cFlagsToSet); - return true; + + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) + { + if (PS2Float(xReg).IsDenormalized()) + { + _ContVal_ |= (cFlagsToSet); + return true; + } + else if (cFlagsToSet & FPUflagU) + _ContVal_ &= ~FPUflagU; + } + else + { + if (((xReg & 0x7F800000) == 0) && ((xReg & 0x007FFFFF) != 0)) + { + /*Console.Warning( "FPU UNDERFLOW!: Changing to +/-0!!!!!!!!!!!!\n" );*/ + xReg &= 0x80000000; + _ContVal_ |= (cFlagsToSet); + return true; + } + else if (cFlagsToSet & FPUflagU) + _ContVal_ &= ~FPUflagU; } - else if (cFlagsToSet & FPUflagU) - _ContVal_ &= ~FPUflagU; return false; } @@ -106,9 +135,36 @@ __fi u32 fp_min(u32 a, u32 b) */ bool checkDivideByZero(u32& xReg, u32 yDivisorReg, u32 zDividendReg, u32 cFlagsToSet1, u32 cFlagsToSet2) { - if ( (yDivisorReg & 0x7F800000) == 0 ) { - _ContVal_ |= ( (zDividendReg & 0x7F800000) == 0 ) ? cFlagsToSet2 : cFlagsToSet1; - xReg = ( (yDivisorReg ^ zDividendReg) & 0x80000000 ) | posFmax; + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) + { + PS2Float yMatrix = PS2Float(yDivisorReg); + PS2Float zMatrix = PS2Float(zDividendReg); + + if (yMatrix.IsZero()) + { + bool dividendZero = zMatrix.IsZero(); + + _ContVal_ |= dividendZero ? cFlagsToSet2 : cFlagsToSet1; + + bool IsSigned = yMatrix.Sign ^ zMatrix.Sign; + + if (dividendZero) + xReg = IsSigned ? PS2Float::MIN_FLOATING_POINT_VALUE : PS2Float::MAX_FLOATING_POINT_VALUE; + else + { + PS2Float zeroRes = PS2Float(0); + + zeroRes.Sign = IsSigned; + xReg = zeroRes.AsUInt32(); + } + + return true; + } + } + else if ((yDivisorReg & 0x7F800000) == 0) + { + _ContVal_ |= ((zDividendReg & 0x7F800000) == 0) ? cFlagsToSet2 : cFlagsToSet1; + xReg = ((yDivisorReg ^ zDividendReg) & 0x80000000) | posFmax; return true; } @@ -125,25 +181,6 @@ bool checkDivideByZero(u32& xReg, u32 yDivisorReg, u32 zDividendReg, u32 cFlagsT _ContVal_ &= ~( cFlags ) ; \ } -#ifdef comparePrecision -// This compare discards the least-significant bit(s) in order to solve some rounding issues. - #define C_cond_S(cond) { \ - FPRreg tempA, tempB; \ - tempA.UL = _FsValUl_ & comparePrecision; \ - tempB.UL = _FtValUl_ & comparePrecision; \ - _ContVal_ = ( ( tempA.f ) cond ( tempB.f ) ) ? \ - ( _ContVal_ | FPUflagC ) : \ - ( _ContVal_ & ~FPUflagC ); \ - } -#else -// Used for Comparing; This compares if the floats are exactly the same. - #define C_cond_S(cond) { \ - _ContVal_ = ( fpuDouble(_FsValUl_) cond fpuDouble(_FtValUl_) ) ? \ - ( _ContVal_ | FPUflagC ) : \ - ( _ContVal_ & ~FPUflagC ); \ - } -#endif - // Conditional Branch #define BC1(cond) \ if ( ( _ContVal_ & FPUflagC ) cond 0 ) { \ @@ -182,19 +219,94 @@ float fpuDouble(u32 f) } } +static __fi u32 fpuAccurateAdd(u32 a, u32 b) +{ + if (CHECK_FPU_SOFT_ADDSUB) return PS2Float(a).Add(PS2Float(b)).AsUInt32(); + + return std::bit_cast(fpuDouble(a) + fpuDouble(b)); +} + +static __fi u32 fpuAccurateSub(u32 a, u32 b) +{ + if (CHECK_FPU_SOFT_ADDSUB) return PS2Float(a).Sub(PS2Float(b)).AsUInt32(); + + return std::bit_cast(fpuDouble(a) - fpuDouble(b)); +} + +static __fi u32 fpuAccurateMul(u32 a, u32 b) +{ + if (CHECK_FPU_SOFT_MULDIV) return PS2Float(a).Mul(PS2Float(b)).AsUInt32(); + + return std::bit_cast(fpuDouble(a) * fpuDouble(b)); +} + +static __fi u32 fpuAccurateDiv(u32 a, u32 b) +{ + if (CHECK_FPU_SOFT_MULDIV) return PS2Float(a).Div(PS2Float(b)).AsUInt32(); + + return std::bit_cast(fpuDouble(a) / fpuDouble(b)); +} + +static __fi s32 double_to_int(double value) +{ + if (value >= 2147483647.0) + return 2147483647LL; + if (value <= -2147483648.0) + return -2147483648LL; + return value; +} + +static __fi void C_cond_S(uint8_t mode) +{ + switch (mode) + { + case 0: // == + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) + { + _ContVal_ = (PS2Float(_FsValUl_).CompareTo(PS2Float(_FtValUl_)) == 0) ? (_ContVal_ | FPUflagC) : (_ContVal_ & ~FPUflagC); + } + else + { + _ContVal_ = (fpuDouble(_FsValUl_) == fpuDouble(_FtValUl_)) ? (_ContVal_ | FPUflagC) : (_ContVal_ & ~FPUflagC); + } + break; + case 1: // <= + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) + { + int32_t cmpResult = PS2Float(_FsValUl_).CompareTo(PS2Float(_FtValUl_)); + _ContVal_ = (cmpResult == 0 || cmpResult == -1) ? (_ContVal_ | FPUflagC) : (_ContVal_ & ~FPUflagC); + } + else + { + _ContVal_ = (fpuDouble(_FsValUl_) <= fpuDouble(_FtValUl_)) ? (_ContVal_ | FPUflagC) : (_ContVal_ & ~FPUflagC); + } + break; + case 2: // < + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) + { + _ContVal_ = (PS2Float(_FsValUl_).CompareTo(PS2Float(_FtValUl_)) == -1) ? (_ContVal_ | FPUflagC) : (_ContVal_ & ~FPUflagC); + } + else + { + _ContVal_ = (fpuDouble(_FsValUl_) < fpuDouble(_FtValUl_)) ? (_ContVal_ | FPUflagC) : (_ContVal_ & ~FPUflagC); + } + break; + } +} + void ABS_S() { _FdValUl_ = _FsValUl_ & 0x7fffffff; clearFPUFlags( FPUflagO | FPUflagU ); } void ADD_S() { - _FdValf_ = fpuDouble( _FsValUl_ ) + fpuDouble( _FtValUl_ ); + _FdValUl_ = fpuAccurateAdd(_FsValUl_, _FtValUl_); if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); } void ADDA_S() { - _FAValf_ = fpuDouble( _FsValUl_ ) + fpuDouble( _FtValUl_ ); + _FAValUl_ = fpuAccurateAdd(_FsValUl_, _FtValUl_); if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); } @@ -216,7 +328,7 @@ void BC1TL() { } void C_EQ() { - C_cond_S(==); + C_cond_S(0); } void C_F() { @@ -224,11 +336,11 @@ void C_F() { } void C_LE() { - C_cond_S(<=); + C_cond_S(1); } void C_LT() { - C_cond_S(<); + C_cond_S(2); } void CFC1() { @@ -253,14 +365,30 @@ void CVT_S() { } void CVT_W() { - if ( ( _FsValUl_ & 0x7F800000 ) <= 0x4E800000 ) { _FdValSl_ = (s32)_FsValf_; } - else if ( ( _FsValUl_ & 0x80000000 ) == 0 ) { _FdValUl_ = 0x7fffffff; } - else { _FdValUl_ = 0x80000000; } + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) + { + _FdValSl_ = double_to_int(PS2Float(_FsValUl_).ToDouble()); + } + else + { + if ((_FsValUl_ & 0x7F800000) <= 0x4E800000) + { + _FdValSl_ = (s32)_FsValf_; + } + else if ((_FsValUl_ & 0x80000000) == 0) + { + _FdValUl_ = 0x7fffffff; + } + else + { + _FdValUl_ = 0x80000000; + } + } } void DIV_S() { if (checkDivideByZero( _FdValUl_, _FtValUl_, _FsValUl_, FPUflagD | FPUflagSD, FPUflagI | FPUflagSI)) return; - _FdValf_ = fpuDouble( _FsValUl_ ) / fpuDouble( _FtValUl_ ); + _FdValUl_ = fpuAccurateDiv(_FsValUl_, _FtValUl_); if (checkOverflow( _FdValUl_, 0)) return; checkUnderflow( _FdValUl_, 0); } @@ -270,15 +398,13 @@ void DIV_S() { method provides a similar outcome and is faster. (cottonvibes) */ void MADD_S() { - FPRreg temp; - temp.f = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); - _FdValf_ = fpuDouble( _FAValUl_ ) + fpuDouble( temp.UL ); + _FdValUl_ = fpuAccurateAdd(_FAValUl_, fpuAccurateMul(_FsValUl_, _FtValUl_)); if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); } void MADDA_S() { - _FAValf_ += fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + _FAValUl_ = fpuAccurateAdd(_FAValUl_, fpuAccurateMul(_FsValUl_, _FtValUl_)); if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); } @@ -303,15 +429,13 @@ void MOV_S() { } void MSUB_S() { - FPRreg temp; - temp.f = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); - _FdValf_ = fpuDouble( _FAValUl_ ) - fpuDouble( temp.UL ); + _FdValUl_ = fpuAccurateSub(_FAValUl_, fpuAccurateMul(_FsValUl_, _FtValUl_)); if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); } void MSUBA_S() { - _FAValf_ -= fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + _FAValUl_ = fpuAccurateSub(_FAValUl_, fpuAccurateMul(_FsValUl_, _FtValUl_)); if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); } @@ -321,13 +445,13 @@ void MTC1() { } void MUL_S() { - _FdValf_ = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + _FdValUl_ = fpuAccurateMul(_FsValUl_, _FtValUl_); if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); } void MULA_S() { - _FAValf_ = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + _FAValUl_ = fpuAccurateMul(_FsValUl_, _FtValUl_); if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); } @@ -341,17 +465,45 @@ void RSQRT_S() { FPRreg temp; clearFPUFlags(FPUflagD | FPUflagI); - if ( ( _FtValUl_ & 0x7F800000 ) == 0 ) { // Ft is zero (Denormals are Zero) - _ContVal_ |= FPUflagD | FPUflagSD; - _FdValUl_ = ( _FtValUl_ & 0x80000000 ) | posFmax; - return; + if (CHECK_FPU_SOFT_SQRT) + { + PS2Float value = PS2Float(_FtValUl_); + + if (value.IsDenormalized()) + { + _ContVal_ |= FPUflagD | FPUflagSD; + _FdValUl_ = value.Sign ? PS2Float::MIN_FLOATING_POINT_VALUE : PS2Float::MAX_FLOATING_POINT_VALUE; + return; + } + else if (_FtValUl_ & 0x80000000) // Ft is negative + { + _ContVal_ |= FPUflagI | FPUflagSI; + _FdValUl_ = PS2Float(_FsValUl_).Rsqrt(PS2Float(value.Abs())).AsUInt32(); + } + else // Ft is positive and not zero + { + _FdValUl_ = PS2Float(_FsValUl_).Rsqrt(value).AsUInt32(); + } } - else if ( _FtValUl_ & 0x80000000 ) { // Ft is negative - _ContVal_ |= FPUflagI | FPUflagSI; - temp.f = sqrt( fabs( fpuDouble( _FtValUl_ ) ) ); - _FdValf_ = fpuDouble( _FsValUl_ ) / fpuDouble( temp.UL ); + else + { + if ((_FtValUl_ & 0x7F800000) == 0) // Ft is zero (Denormals are Zero) + { + _ContVal_ |= FPUflagD | FPUflagSD; + _FdValUl_ = (_FtValUl_ & 0x80000000) | posFmax; + return; + } + else if (_FtValUl_ & 0x80000000) // Ft is negative + { + _ContVal_ |= FPUflagI | FPUflagSI; + temp.f = sqrt(fabs(fpuDouble(_FtValUl_))); + _FdValf_ = fpuDouble(_FsValUl_) / fpuDouble(temp.UL); + } + else // Ft is positive and not zero + { + _FdValf_ = fpuDouble(_FsValUl_) / sqrt(fpuDouble(_FtValUl_)); + } } - else { _FdValf_ = fpuDouble( _FsValUl_ ) / sqrt( fpuDouble( _FtValUl_ ) ); } // Ft is positive and not zero if (checkOverflow( _FdValUl_, 0)) return; checkUnderflow( _FdValUl_, 0); @@ -360,23 +512,40 @@ void RSQRT_S() { void SQRT_S() { clearFPUFlags(FPUflagI | FPUflagD); - if ( ( _FtValUl_ & 0x7F800000 ) == 0 ) // If Ft = +/-0 - _FdValUl_ = _FtValUl_ & 0x80000000;// result is 0 - else if ( _FtValUl_ & 0x80000000 ) { // If Ft is Negative - _ContVal_ |= FPUflagI | FPUflagSI; - _FdValf_ = sqrt( fabs( fpuDouble( _FtValUl_ ) ) ); - } else - _FdValf_ = sqrt( fpuDouble( _FtValUl_ ) ); // If Ft is Positive + if (CHECK_FPU_SOFT_SQRT) + { + PS2Float value = PS2Float(_FtValUl_); + + if (_FtValUl_ & 0x80000000) // If Ft is Negative + { + _ContVal_ |= FPUflagI | FPUflagSI; + _FdValUl_ = PS2Float(value.Abs()).Sqrt().AsUInt32(); + } + else + _FdValUl_ = value.Sqrt().AsUInt32(); // If Ft is Positive + } + else + { + if ((_FtValUl_ & 0x7F800000) == 0) // If Ft = +/-0 + _FdValUl_ = _FtValUl_ & 0x80000000; // result is 0 + else if (_FtValUl_ & 0x80000000) // If Ft is Negative + { + _ContVal_ |= FPUflagI | FPUflagSI; + _FdValf_ = sqrt(fabs(fpuDouble(_FtValUl_))); + } + else + _FdValf_ = sqrt(fpuDouble(_FtValUl_)); // If Ft is Positive + } } void SUB_S() { - _FdValf_ = fpuDouble( _FsValUl_ ) - fpuDouble( _FtValUl_ ); + _FdValUl_ = fpuAccurateSub(_FsValUl_, _FtValUl_); if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); } void SUBA_S() { - _FAValf_ = fpuDouble( _FsValUl_ ) - fpuDouble( _FtValUl_ ); + _FAValUl_ = fpuAccurateSub(_FsValUl_, _FtValUl_); if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); } diff --git a/pcsx2/PS2Float.cpp b/pcsx2/PS2Float.cpp new file mode 100644 index 0000000000000..9a9babeabfad7 --- /dev/null +++ b/pcsx2/PS2Float.cpp @@ -0,0 +1,821 @@ +// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team +// SPDX-License-Identifier: GPL-3.0+ + +#include +#include +#include +#include +#include +#include +#include +#include "PS2Float.h" +#include "Common.h" + +//**************************************************************** +// Booth Multiplier +//**************************************************************** + +PS2Float::BoothRecode PS2Float::Booth(u32 a, u32 b, u32 bit) +{ + u32 test = (bit ? b >> (bit * 2 - 1) : b << 1) & 7; + a <<= (bit * 2); + a += (test == 3 || test == 4) ? a : 0; + u32 neg = (test >= 4 && test <= 6) ? ~0u : 0; + u32 pos = 1 << (bit * 2); + a ^= (neg & -pos); + a &= (test >= 1 && test <= 6) ? ~0u : 0; + return {a, neg & pos}; +} + +PS2Float::AddResult PS2Float::Add3(u32 a, u32 b, u32 c) +{ + u32 u = a ^ b; + return {u ^ c, ((u & c) | (a & b)) << 1}; +} + +u64 PS2Float::MulMantissa(u32 a, u32 b) +{ + u64 full = static_cast(a) * static_cast(b); + BoothRecode b0 = Booth(a, b, 0); + BoothRecode b1 = Booth(a, b, 1); + BoothRecode b2 = Booth(a, b, 2); + BoothRecode b3 = Booth(a, b, 3); + BoothRecode b4 = Booth(a, b, 4); + BoothRecode b5 = Booth(a, b, 5); + BoothRecode b6 = Booth(a, b, 6); + BoothRecode b7 = Booth(a, b, 7); + + // First cycle + AddResult t0 = Add3(b1.data, b2.data, b3.data); + AddResult t1 = Add3(b4.data & ~0x7ffu, b5.data & ~0xfffu, b6.data); + // A few adds get skipped, squeeze them back in + t1.hi |= b6.negate | (b5.data & 0x800); + b7.data |= (b5.data & 0x400) + b5.negate; + + // Second cycle + AddResult t2 = Add3(b0.data, t0.lo, t0.hi); + AddResult t3 = Add3(b7.data, t1.lo, t1.hi); + + // Third cycle + AddResult t4 = Add3(t2.hi, t3.lo, t3.hi); + + // Fourth cycle + AddResult t5 = Add3(t2.lo, t4.lo, t4.hi); + + // Discard bits and sum + t5.hi += b7.negate; + t5.lo &= ~0x7fffu; + t5.hi &= ~0x7fffu; + u32 ps2lo = t5.lo + t5.hi; + return full - ((ps2lo ^ full) & 0x8000); +} + +//**************************************************************** +// Float Processor +//**************************************************************** + +PS2Float::PS2Float(u32 value) + : Sign((value >> 31) & 1) + , Exponent((u8)(((value >> 23) & 0xFF))) + , Mantissa(value & 0x7FFFFF) +{ +} + +PS2Float::PS2Float(bool sign, u8 exponent, u32 mantissa) + : Sign(sign) + , Exponent(exponent) + , Mantissa(mantissa) +{ +} + +PS2Float PS2Float::Max() +{ + return PS2Float(MAX_FLOATING_POINT_VALUE); +} + +PS2Float PS2Float::Min() +{ + return PS2Float(MIN_FLOATING_POINT_VALUE); +} + +PS2Float PS2Float::One() +{ + return PS2Float(ONE); +} + +PS2Float PS2Float::MinOne() +{ + return PS2Float(MIN_ONE); +} + +u32 PS2Float::AsUInt32() const +{ + u32 result = 0; + result |= (Sign ? 1u : 0u) << 31; + result |= (u32)(Exponent << 23); + result |= Mantissa; + return result; +} + +PS2Float PS2Float::Add(PS2Float addend) +{ + if (IsDenormalized() || addend.IsDenormalized()) + return SolveAddSubDenormalizedOperation(*this, addend, true); + + if (IsAbnormal() && addend.IsAbnormal()) + return SolveAbnormalAdditionOrSubtractionOperation(*this, addend, true); + + u32 a = AsUInt32(); + u32 b = addend.AsUInt32(); + + //exponent difference + s32 exp_diff = ((a >> 23) & 0xFF) - ((b >> 23) & 0xFF); + + //diff = 25 .. 255 , expt < expd + if (exp_diff >= 25) + { + b = b & SIGNMASK; + } + + //diff = 1 .. 24, expt < expd + else if (exp_diff > 0) + { + exp_diff = exp_diff - 1; + b = (MIN_FLOATING_POINT_VALUE << exp_diff) & b; + } + + //diff = -255 .. -25, expd < expt + else if (exp_diff <= -25) + { + a = a & SIGNMASK; + } + + //diff = -24 .. -1 , expd < expt + else if (exp_diff < 0) + { + exp_diff = -exp_diff; + exp_diff = exp_diff - 1; + a = a & (MIN_FLOATING_POINT_VALUE << exp_diff); + } + + return PS2Float(a).DoAdd(PS2Float(b)); +} + +PS2Float PS2Float::Sub(PS2Float subtrahend) +{ + if (IsDenormalized() || subtrahend.IsDenormalized()) + return SolveAddSubDenormalizedOperation(*this, subtrahend, false); + + if (IsAbnormal() && subtrahend.IsAbnormal()) + return SolveAbnormalAdditionOrSubtractionOperation(*this, subtrahend, false); + + u32 a = AsUInt32(); + u32 b = subtrahend.AsUInt32(); + + //exponent difference + s32 exp_diff = ((a >> 23) & 0xFF) - ((b >> 23) & 0xFF); + + //diff = 25 .. 255 , expt < expd + if (exp_diff >= 25) + { + b = b & SIGNMASK; + } + + //diff = 1 .. 24, expt < expd + else if (exp_diff > 0) + { + exp_diff = exp_diff - 1; + b = (MIN_FLOATING_POINT_VALUE << exp_diff) & b; + } + + //diff = -255 .. -25, expd < expt + else if (exp_diff <= -25) + { + a = a & SIGNMASK; + } + + //diff = -24 .. -1 , expd < expt + else if (exp_diff < 0) + { + exp_diff = -exp_diff; + exp_diff = exp_diff - 1; + a = a & (MIN_FLOATING_POINT_VALUE << exp_diff); + } + + + return PS2Float(a).DoAdd(Neg(PS2Float(b))); +} + +PS2Float PS2Float::Mul(PS2Float mulend) +{ + if (IsDenormalized() || mulend.IsDenormalized()) + return SolveMultiplicationDenormalizedOperation(*this, mulend); + + if (IsAbnormal() && mulend.IsAbnormal()) + return SolveAbnormalMultiplicationOrDivisionOperation(*this, mulend, true); + + if (IsZero() || mulend.IsZero()) + { + PS2Float result = PS2Float(0); + + result.Sign = DetermineMultiplicationDivisionOperationSign(*this, mulend); + return result; + } + + return DoMul(mulend); +} + +PS2Float PS2Float::Div(PS2Float divend) +{ + if (IsDenormalized() || divend.IsDenormalized()) + return SolveDivisionDenormalizedOperation(*this, divend); + + if (IsAbnormal() && divend.IsAbnormal()) + return SolveAbnormalMultiplicationOrDivisionOperation(*this, divend, false); + + if (IsZero()) + { + PS2Float result = PS2Float(0); + + result.Sign = DetermineMultiplicationDivisionOperationSign(*this, divend); + return result; + } + else if (divend.IsZero()) + return DetermineMultiplicationDivisionOperationSign(*this, divend) ? Min() : Max(); + + return DoDiv(divend); +} + +// Rounding can be slightly off: (PS2: rsqrt(0x7FFFFFF0) -> 0x5FB504ED | SoftFloat/IEEE754 rsqrt(0x7FFFFFF0) -> 0x5FB504EE). +PS2Float PS2Float::Sqrt() +{ + s32 t; + s32 s = 0; + s32 q = 0; + u32 r = 0x01000000; /* r = moving bit from right to left */ + + if (IsDenormalized()) + return PS2Float(0); + + // PS2 only takes positive numbers for SQRT, and convert if necessary. + s32 ix = (s32)(PS2Float(false, Exponent, Mantissa).AsUInt32()); + + /* extract mantissa and unbias exponent */ + s32 m = (ix >> 23) - BIAS; + + ix = (ix & 0x007FFFFF) | 0x00800000; + if ((m & 1) == 1) + { + /* odd m, double x to make it even */ + ix += ix; + } + + m >>= 1; /* m = [m/2] */ + + /* generate sqrt(x) bit by bit */ + ix += ix; + + while (r != 0) + { + t = s + (s32)(r); + if (t <= ix) + { + s = t + (s32)(r); + ix -= t; + q += (s32)(r); + } + + ix += ix; + r >>= 1; + } + + /* use floating add to find out rounding direction */ + if (ix != 0) + { + q += q & 1; + } + + ix = (q >> 1) + 0x3F000000; + ix += m << 23; + + return PS2Float((u32)(ix)); +} + +PS2Float PS2Float::Rsqrt(PS2Float other) +{ + return Div(other.Sqrt()); +} + +bool PS2Float::IsDenormalized() +{ + return Exponent == 0; +} + +bool PS2Float::IsAbnormal() +{ + u32 val = AsUInt32(); + return val == MAX_FLOATING_POINT_VALUE || val == MIN_FLOATING_POINT_VALUE || + val == POSITIVE_INFINITY_VALUE || val == NEGATIVE_INFINITY_VALUE; +} + +bool PS2Float::IsZero() +{ + return (Abs()) == 0; +} + +u32 PS2Float::Abs() +{ + return (AsUInt32() & MAX_FLOATING_POINT_VALUE); +} + +PS2Float PS2Float::RoundTowardsZero() +{ + return PS2Float((u32)(std::trunc((double)(AsUInt32())))); +} + +s32 PS2Float::CompareTo(PS2Float other) +{ + s32 selfTwoComplementVal = (s32)(Abs()); + if (Sign) + selfTwoComplementVal = -selfTwoComplementVal; + + s32 otherTwoComplementVal = (s32)(other.Abs()); + if (other.Sign) + otherTwoComplementVal = -otherTwoComplementVal; + + if (selfTwoComplementVal < otherTwoComplementVal) + return -1; + else if (selfTwoComplementVal == otherTwoComplementVal) + return 0; + else + return 1; +} + +s32 PS2Float::CompareOperand(PS2Float other) +{ + s32 selfTwoComplementVal = (s32)(Abs()); + s32 otherTwoComplementVal = (s32)(other.Abs()); + + if (selfTwoComplementVal < otherTwoComplementVal) + return -1; + else if (selfTwoComplementVal == otherTwoComplementVal) + return 0; + else + return 1; +} + +double PS2Float::ToDouble() +{ + return std::bit_cast(((u64)Sign << 63) | ((((u64)Exponent - BIAS) + 1023ULL) << 52) | ((u64)Mantissa << 29)); +} + +std::string PS2Float::ToString() +{ + double res = ToDouble(); + + u32 value = AsUInt32(); + std::ostringstream oss; + oss << std::fixed << std::setprecision(6); + + if (IsDenormalized()) + { + oss << "Denormalized(" << res << ")"; + } + else if (value == MAX_FLOATING_POINT_VALUE) + { + oss << "Fmax(" << res << ")"; + } + else if (value == MIN_FLOATING_POINT_VALUE) + { + oss << "-Fmax(" << res << ")"; + } + else if (value == POSITIVE_INFINITY_VALUE) + { + oss << "Inf(" << res << ")"; + } + else if (value == NEGATIVE_INFINITY_VALUE) + { + oss << "-Inf(" << res << ")"; + } + else + { + oss << "Ps2Float(" << res << ")"; + } + + return oss.str(); +} + +PS2Float PS2Float::DoAdd(PS2Float other) +{ + const u8 roundingMultiplier = 6; + + u8 selfExponent = Exponent; + s32 resExponent = selfExponent - other.Exponent; + + if (resExponent < 0) + return other.DoAdd(*this); + else if (resExponent >= 25) + return *this; + + // http://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate + u32 sign1 = (u32)((s32)AsUInt32() >> 31); + s32 selfMantissa = (s32)(((Mantissa | 0x800000) ^ sign1) - sign1); + u32 sign2 = (u32)((s32)other.AsUInt32() >> 31); + s32 otherMantissa = (s32)(((other.Mantissa | 0x800000) ^ sign2) - sign2); + + // PS2 multiply by 2 before doing the Math here. + s32 man = (selfMantissa << roundingMultiplier) + ((otherMantissa << roundingMultiplier) >> resExponent); + s32 absMan = abs(man); + if (absMan == 0) + return PS2Float(0); + + // Remove from exponent the PS2 Multiplier value. + s32 rawExp = selfExponent - roundingMultiplier; + + s32 amount = normalizeAmounts[clz(absMan)]; + rawExp -= amount; + absMan <<= amount; + + s32 msbIndex = BitScanReverse8(absMan >> 23); + rawExp += msbIndex; + absMan >>= msbIndex; + + if (rawExp > 255) + return man < 0 ? Min() : Max(); + else if (rawExp <= 0) + return PS2Float(man < 0, 0, 0); + + return PS2Float(((u32)man & SIGNMASK) | (u32)rawExp << 23 | ((u32)absMan & 0x7FFFFF)); +} + +PS2Float PS2Float::DoMul(PS2Float other) +{ + u8 selfExponent = Exponent; + u8 otherExponent = other.Exponent; + u32 selfMantissa = Mantissa | 0x800000; + u32 otherMantissa = other.Mantissa | 0x800000; + u32 sign = (AsUInt32() ^ other.AsUInt32()) & SIGNMASK; + + s32 resExponent = selfExponent + otherExponent - 127; + u32 resMantissa = (u32)(MulMantissa(selfMantissa, otherMantissa) >> 23); + + if (resMantissa > 0xFFFFFF) + { + resMantissa >>= 1; + resExponent++; + } + + if (resExponent > 255) + return PS2Float(sign | MAX_FLOATING_POINT_VALUE); + else if (resExponent <= 0) + return PS2Float(sign); + + return PS2Float(sign | (u32)(resExponent << 23) | (resMantissa & 0x7FFFFF)); +} + +// Rounding can be slightly off: (PS2: 0x3F800000 / 0x3F800001 = 0x3F7FFFFF | SoftFloat/IEEE754: 0x3F800000 / 0x3F800001 = 0x3F7FFFFE). +PS2Float PS2Float::DoDiv(PS2Float other) +{ + u64 selfMantissa64; + u32 selfMantissa = Mantissa | 0x800000; + u32 otherMantissa = other.Mantissa | 0x800000; + s32 resExponent = Exponent - other.Exponent + BIAS; + + PS2Float result = PS2Float(0); + + result.Sign = DetermineMultiplicationDivisionOperationSign(*this, other); + + if (resExponent > 255) + return result.Sign ? Min() : Max(); + else if (resExponent <= 0) + return PS2Float(result.Sign, 0, 0); + + if (selfMantissa < otherMantissa) + { + --resExponent; + if (resExponent == 0) + return PS2Float(result.Sign, 0, 0); + selfMantissa64 = (u64)(selfMantissa) << 31; + } + else + { + selfMantissa64 = (u64)(selfMantissa) << 30; + } + + u32 resMantissa = (u32)(selfMantissa64 / otherMantissa); + if ((resMantissa & 0x3F) == 0) + resMantissa |= ((u64)(otherMantissa)*resMantissa != selfMantissa64) ? 1U : 0; + + result.Exponent = (u8)(resExponent); + result.Mantissa = (resMantissa + 0x40U) >> 7; + + if (result.Mantissa > 0) + { + s32 leadingBitPosition = PS2Float::GetMostSignificantBitPosition(result.Mantissa); + + while (leadingBitPosition != IMPLICIT_LEADING_BIT_POS) + { + if (leadingBitPosition > IMPLICIT_LEADING_BIT_POS) + { + result.Mantissa >>= 1; + + s32 exp = ((s32)result.Exponent + 1); + + if (exp > 255) + return result.Sign ? Min() : Max(); + + result.Exponent = (u8)exp; + + leadingBitPosition--; + } + else if (leadingBitPosition < IMPLICIT_LEADING_BIT_POS) + { + result.Mantissa <<= 1; + + s32 exp = ((s32)result.Exponent - 1); + + if (exp <= 0) + return PS2Float(result.Sign, 0, 0); + + result.Exponent = (u8)exp; + + leadingBitPosition++; + } + } + } + + result.Mantissa &= 0x7FFFFF; + return result.RoundTowardsZero(); +} + +PS2Float PS2Float::SolveAbnormalAdditionOrSubtractionOperation(PS2Float a, PS2Float b, bool add) +{ + u32 aval = a.AsUInt32(); + u32 bval = b.AsUInt32(); + + if (aval == MAX_FLOATING_POINT_VALUE && bval == MAX_FLOATING_POINT_VALUE) + return add ? Max() : PS2Float(0); + + if (aval == MIN_FLOATING_POINT_VALUE && bval == MIN_FLOATING_POINT_VALUE) + return add ? Min() : PS2Float(0); + + if (aval == MIN_FLOATING_POINT_VALUE && bval == MAX_FLOATING_POINT_VALUE) + return add ? PS2Float(0) : Min(); + + if (aval == MAX_FLOATING_POINT_VALUE && bval == MIN_FLOATING_POINT_VALUE) + return add ? PS2Float(0) : Max(); + + if (aval == POSITIVE_INFINITY_VALUE && bval == POSITIVE_INFINITY_VALUE) + return add ? Max() : PS2Float(0); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == POSITIVE_INFINITY_VALUE) + return add ? PS2Float(0) : Min(); + + if (aval == POSITIVE_INFINITY_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return add ? PS2Float(0) : Max(); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return add ? Min() : PS2Float(0); + + if (aval == MAX_FLOATING_POINT_VALUE && bval == POSITIVE_INFINITY_VALUE) + return add ? Max() : PS2Float(0x7F7FFFFE); + + if (aval == MAX_FLOATING_POINT_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return add ? PS2Float(0x7F7FFFFE) : Max(); + + if (aval == MIN_FLOATING_POINT_VALUE && bval == POSITIVE_INFINITY_VALUE) + return add ? PS2Float(0xFF7FFFFE) : Min(); + + if (aval == MIN_FLOATING_POINT_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return add ? Min() : PS2Float(0xFF7FFFFE); + + if (aval == POSITIVE_INFINITY_VALUE && bval == MAX_FLOATING_POINT_VALUE) + return add ? Max() : PS2Float(0xFF7FFFFE); + + if (aval == POSITIVE_INFINITY_VALUE && bval == MIN_FLOATING_POINT_VALUE) + return add ? PS2Float(0xFF7FFFFE) : Max(); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == MAX_FLOATING_POINT_VALUE) + return add ? PS2Float(0x7F7FFFFE) : Min(); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == MIN_FLOATING_POINT_VALUE) + return add ? Min() : PS2Float(0x7F7FFFFE); + + Console.Error("Unhandled abnormal add/sub floating point operation"); + + return PS2Float(0); +} + +PS2Float PS2Float::SolveAbnormalMultiplicationOrDivisionOperation(PS2Float a, PS2Float b, bool mul) +{ + u32 aval = a.AsUInt32(); + u32 bval = b.AsUInt32(); + + if (mul) + { + if ((aval == MAX_FLOATING_POINT_VALUE && bval == MAX_FLOATING_POINT_VALUE) || + (aval == MIN_FLOATING_POINT_VALUE && bval == MIN_FLOATING_POINT_VALUE)) + return Max(); + + if ((aval == MAX_FLOATING_POINT_VALUE && bval == MIN_FLOATING_POINT_VALUE) || + (aval == MIN_FLOATING_POINT_VALUE && bval == MAX_FLOATING_POINT_VALUE)) + return Min(); + + if (aval == POSITIVE_INFINITY_VALUE && bval == POSITIVE_INFINITY_VALUE) + return Max(); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == POSITIVE_INFINITY_VALUE) + return Min(); + + if (aval == POSITIVE_INFINITY_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return Min(); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return Max(); + + if (aval == MAX_FLOATING_POINT_VALUE && bval == POSITIVE_INFINITY_VALUE) + return Max(); + + if (aval == MAX_FLOATING_POINT_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return Min(); + + if (aval == MIN_FLOATING_POINT_VALUE && bval == POSITIVE_INFINITY_VALUE) + return Min(); + + if (aval == MIN_FLOATING_POINT_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return Max(); + + if (aval == POSITIVE_INFINITY_VALUE && bval == MAX_FLOATING_POINT_VALUE) + return Max(); + + if (aval == POSITIVE_INFINITY_VALUE && bval == MIN_FLOATING_POINT_VALUE) + return Min(); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == MAX_FLOATING_POINT_VALUE) + return Min(); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == MIN_FLOATING_POINT_VALUE) + return Max(); + } + else + { + if ((aval == MAX_FLOATING_POINT_VALUE && bval == MAX_FLOATING_POINT_VALUE) || + (aval == MIN_FLOATING_POINT_VALUE && bval == MIN_FLOATING_POINT_VALUE)) + return One(); + + if ((aval == MAX_FLOATING_POINT_VALUE && bval == MIN_FLOATING_POINT_VALUE) || + (aval == MIN_FLOATING_POINT_VALUE && bval == MAX_FLOATING_POINT_VALUE)) + return MinOne(); + + if (aval == POSITIVE_INFINITY_VALUE && bval == POSITIVE_INFINITY_VALUE) + return One(); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == POSITIVE_INFINITY_VALUE) + return MinOne(); + + if (aval == POSITIVE_INFINITY_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return MinOne(); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return One(); + + if (aval == MAX_FLOATING_POINT_VALUE && bval == POSITIVE_INFINITY_VALUE) + return PS2Float(0x3FFFFFFF); + + if (aval == MAX_FLOATING_POINT_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return PS2Float(0xBFFFFFFF); + + if (aval == MIN_FLOATING_POINT_VALUE && bval == POSITIVE_INFINITY_VALUE) + return PS2Float(0xBFFFFFFF); + + if (aval == MIN_FLOATING_POINT_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return PS2Float(0x3FFFFFFF); + + if (aval == POSITIVE_INFINITY_VALUE && bval == MAX_FLOATING_POINT_VALUE) + return PS2Float(0x3F000001); + + if (aval == POSITIVE_INFINITY_VALUE && bval == MIN_FLOATING_POINT_VALUE) + return PS2Float(0xBF000001); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == MAX_FLOATING_POINT_VALUE) + return PS2Float(0xBF000001); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == MIN_FLOATING_POINT_VALUE) + return PS2Float(0x3F000001); + } + + Console.Error("Unhandled abnormal mul/div floating point operation"); + + return PS2Float(0); +} + +PS2Float PS2Float::SolveAddSubDenormalizedOperation(PS2Float a, PS2Float b, bool add) +{ + PS2Float result = PS2Float(0); + + if (a.IsDenormalized() && !b.IsDenormalized()) + result = b; + else if (!a.IsDenormalized() && b.IsDenormalized()) + result = a; + else if (a.IsDenormalized() && b.IsDenormalized()) + { + } + else + Console.Error("Both numbers are not denormalized"); + + result.Sign = add ? DetermineAdditionOperationSign(a, b) : DetermineSubtractionOperationSign(a, b); + return result; +} + +PS2Float PS2Float::SolveMultiplicationDenormalizedOperation(PS2Float a, PS2Float b) +{ + PS2Float result = PS2Float(0); + + result.Sign = DetermineMultiplicationDivisionOperationSign(a, b); + return result; +} + +PS2Float PS2Float::SolveDivisionDenormalizedOperation(PS2Float a, PS2Float b) +{ + bool sign = DetermineMultiplicationDivisionOperationSign(a, b); + PS2Float result = PS2Float(0); + + if (a.IsDenormalized() && !b.IsDenormalized()) + { + } + else if (!a.IsDenormalized() && b.IsDenormalized()) + return sign ? Min() : Max(); + else if (a.IsDenormalized() && b.IsDenormalized()) + return sign ? Min() : Max(); + else + Console.Error("Both numbers are not denormalized"); + + result.Sign = sign; + return result; +} + +PS2Float PS2Float::Neg(PS2Float self) +{ + return PS2Float(self.AsUInt32() ^ SIGNMASK); +} + +bool PS2Float::DetermineMultiplicationDivisionOperationSign(PS2Float a, PS2Float b) +{ + return a.Sign ^ b.Sign; +} + +bool PS2Float::DetermineAdditionOperationSign(PS2Float a, PS2Float b) +{ + if (a.IsZero() && b.IsZero()) + { + if (!a.Sign || !b.Sign) + return false; + else if (a.Sign && b.Sign) + return true; + else + Console.Error("Unhandled addition operation flags"); + } + + return a.CompareOperand(b) >= 0 ? a.Sign : b.Sign; +} + +bool PS2Float::DetermineSubtractionOperationSign(PS2Float a, PS2Float b) +{ + if (a.IsZero() && b.IsZero()) + { + if (!a.Sign || b.Sign) + return false; + else if (a.Sign && !b.Sign) + return true; + else + Console.Error("Unhandled subtraction operation flags"); + } + + return a.CompareOperand(b) >= 0 ? a.Sign : !b.Sign; +} + +s32 PS2Float::clz(s32 x) +{ + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + + return debruijn32[(u32)x * 0x8c0b2891u >> 26]; +} + +s32 PS2Float::BitScanReverse8(s32 b) +{ + return msb[b]; +} + +s32 PS2Float::GetMostSignificantBitPosition(u32 value) +{ + for (s32 i = 31; i >= 0; i--) + { + if (((value >> i) & 1) != 0) + return i; + } + return -1; +} diff --git a/pcsx2/PS2Float.h b/pcsx2/PS2Float.h new file mode 100644 index 0000000000000..341bc10b56c0e --- /dev/null +++ b/pcsx2/PS2Float.h @@ -0,0 +1,142 @@ +// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team +// SPDX-License-Identifier: GPL-3.0+ + +#pragma once + +#include + +class PS2Float +{ + struct BoothRecode + { + u32 data; + u32 negate; + }; + + struct AddResult + { + u32 lo; + u32 hi; + }; + + static u64 MulMantissa(u32 a, u32 b); + + static BoothRecode Booth(u32 a, u32 b, u32 bit); + + static AddResult Add3(u32 a, u32 b, u32 c); + +public: + bool Sign; + u8 Exponent; + u32 Mantissa; + + static constexpr u8 BIAS = 127; + static constexpr u32 SIGNMASK = 0x80000000; + static constexpr u32 MAX_FLOATING_POINT_VALUE = 0x7FFFFFFF; + static constexpr u32 MIN_FLOATING_POINT_VALUE = 0xFFFFFFFF; + static constexpr u32 POSITIVE_INFINITY_VALUE = 0x7F800000; + static constexpr u32 NEGATIVE_INFINITY_VALUE = 0xFF800000; + static constexpr u32 ONE = 0x3F800000; + static constexpr u32 MIN_ONE = 0xBF800000; + static constexpr int IMPLICIT_LEADING_BIT_POS = 23; + + static constexpr s8 msb[256] = { + -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 + }; + + static constexpr s32 debruijn32[64] = { + 32, 8, 17, -1, -1, 14, -1, -1, -1, 20, -1, -1, -1, 28, -1, 18, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 26, 25, 24, + 4, 11, 23, 31, 3, 7, 10, 16, 22, 30, -1, -1, 2, 6, 13, 9, + -1, 15, -1, 21, -1, 29, 19, -1, -1, -1, -1, -1, 1, 27, 5, 12 + }; + + static constexpr s32 normalizeAmounts[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16, 24, 24, 24, 24, 24, 24, 24 + }; + + PS2Float(u32 value); + + PS2Float(bool sign, u8 exponent, u32 mantissa); + + static PS2Float Max(); + + static PS2Float Min(); + + static PS2Float One(); + + static PS2Float MinOne(); + + static PS2Float Neg(PS2Float self); + + u32 AsUInt32() const; + + PS2Float Add(PS2Float addend); + + PS2Float Sub(PS2Float subtrahend); + + PS2Float Mul(PS2Float mulend); + + PS2Float Div(PS2Float divend); + + PS2Float Sqrt(); + + PS2Float Rsqrt(PS2Float other); + + bool IsDenormalized(); + + bool IsAbnormal(); + + bool IsZero(); + + u32 Abs(); + + PS2Float RoundTowardsZero(); + + s32 CompareTo(PS2Float other); + + s32 CompareOperand(PS2Float other); + + double ToDouble(); + + std::string ToString(); + +protected: + +private: + + PS2Float DoAdd(PS2Float other); + + PS2Float DoMul(PS2Float other); + + PS2Float DoDiv(PS2Float other); + + static PS2Float SolveAbnormalAdditionOrSubtractionOperation(PS2Float a, PS2Float b, bool add); + + static PS2Float SolveAbnormalMultiplicationOrDivisionOperation(PS2Float a, PS2Float b, bool mul); + + static PS2Float SolveAddSubDenormalizedOperation(PS2Float a, PS2Float b, bool add); + + static PS2Float SolveMultiplicationDenormalizedOperation(PS2Float a, PS2Float b); + + static PS2Float SolveDivisionDenormalizedOperation(PS2Float a, PS2Float b); + + static bool DetermineMultiplicationDivisionOperationSign(PS2Float a, PS2Float b); + + static bool DetermineAdditionOperationSign(PS2Float a, PS2Float b); + + static bool DetermineSubtractionOperationSign(PS2Float a, PS2Float b); + + static s32 GetMostSignificantBitPosition(u32 value); + + static s32 BitScanReverse8(s32 b); + + static s32 clz(s32 x); +}; diff --git a/pcsx2/Pcsx2Config.cpp b/pcsx2/Pcsx2Config.cpp index 071ff140ebae2..4c3f70a573fdf 100644 --- a/pcsx2/Pcsx2Config.cpp +++ b/pcsx2/Pcsx2Config.cpp @@ -536,14 +536,27 @@ void Pcsx2Config::RecompilerOptions::LoadSave(SettingsWrapper& wrap) SettingsWrapBitBool(vu0ExtraOverflow); SettingsWrapBitBool(vu0SignOverflow); SettingsWrapBitBool(vu0Underflow); + + SettingsWrapBitBool(vu0SoftAddSub); + SettingsWrapBitBool(vu0SoftMulDiv); + SettingsWrapBitBool(vu0SoftSqrt); + SettingsWrapBitBool(vu1Overflow); SettingsWrapBitBool(vu1ExtraOverflow); SettingsWrapBitBool(vu1SignOverflow); SettingsWrapBitBool(vu1Underflow); + + SettingsWrapBitBool(vu1SoftAddSub); + SettingsWrapBitBool(vu1SoftMulDiv); + SettingsWrapBitBool(vu1SoftSqrt); SettingsWrapBitBool(fpuOverflow); SettingsWrapBitBool(fpuExtraOverflow); SettingsWrapBitBool(fpuFullMode); + + SettingsWrapBitBool(fpuSoftAddSub); + SettingsWrapBitBool(fpuSoftMulDiv); + SettingsWrapBitBool(fpuSoftSqrt); } u32 Pcsx2Config::RecompilerOptions::GetEEClampMode() const diff --git a/pcsx2/VU.h b/pcsx2/VU.h index 1f8224bc396b5..72b519c8fa932 100644 --- a/pcsx2/VU.h +++ b/pcsx2/VU.h @@ -124,6 +124,7 @@ struct alignas(16) VURegs REG_VI q; REG_VI p; + VECTOR TMP; // Temporary vector used to stack FMA operations uint idx; // VU index (0 or 1) // flags/cycle are needed by VIF dma code, so they have to be here (for now) diff --git a/pcsx2/VUflags.cpp b/pcsx2/VUflags.cpp index 22632cf36b613..d3422c2587998 100644 --- a/pcsx2/VUflags.cpp +++ b/pcsx2/VUflags.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: GPL-3.0+ #include "Common.h" - +#include "PS2Float.h" #include #include @@ -12,21 +12,22 @@ /* NEW FLAGS */ //By asadr. Thnkx F|RES :p /*****************************************/ -static __ri u32 VU_MAC_UPDATE( int shift, VURegs * VU, float f ) +static __ri u32 VU_MAC_UPDATE(int shift, VURegs* VU, u32 f) { - u32 v = *(u32*)&f; - int exp = (v >> 23) & 0xff; - u32 s = v & 0x80000000; + PS2Float ps2f = PS2Float(f); + + u32 exp = ps2f.Exponent; + u32 s = ps2f.AsUInt32() & PS2Float::SIGNMASK; if (s) VU->macflag |= 0x0010<macflag &= ~(0x0010<macflag = (VU->macflag & ~(0x1100<macflag = (VU->macflag&~(0x1000<macflag = (VU->macflag&~(0x0101<macflag = (VU->macflag & ~(0x0101 << shift)) | (0x1000 << shift); + return f; + } + else + return f; + } + else if (CHECK_VU_OVERFLOW((VU == &VU1) ? 1 : 0)) + { + VU->macflag = (VU->macflag & ~(0x0101 << shift)) | (0x1000 << shift); + return s | 0x7f7fffff; /* max IEEE754 allowed */ + } else - return v; + { + VU->macflag = (VU->macflag & ~(0x0101 << shift)) | (0x1000 << shift); + return f; + } default: VU->macflag = (VU->macflag & ~(0x1101<macflag&= ~(0x1111<<3); } -__fi void VU_MACy_CLEAR(VURegs * VU) +__fi void VU_MACy_CLEAR(VURegs* VU) { VU->macflag&= ~(0x1111<<2); } -__fi void VU_MACz_CLEAR(VURegs * VU) +__fi void VU_MACz_CLEAR(VURegs* VU) { VU->macflag&= ~(0x1111<<1); } -__fi void VU_MACw_CLEAR(VURegs * VU) +__fi void VU_MACw_CLEAR(VURegs* VU) { VU->macflag&= ~(0x1111<<0); } diff --git a/pcsx2/VUflags.h b/pcsx2/VUflags.h index 86e844ae628f0..12bd316351270 100644 --- a/pcsx2/VUflags.h +++ b/pcsx2/VUflags.h @@ -4,12 +4,12 @@ #pragma once #include "VU.h" -extern u32 VU_MACx_UPDATE(VURegs * VU, float x); -extern u32 VU_MACy_UPDATE(VURegs * VU, float y); -extern u32 VU_MACz_UPDATE(VURegs * VU, float z); -extern u32 VU_MACw_UPDATE(VURegs * VU, float w); -extern void VU_MACx_CLEAR(VURegs * VU); -extern void VU_MACy_CLEAR(VURegs * VU); -extern void VU_MACz_CLEAR(VURegs * VU); -extern void VU_MACw_CLEAR(VURegs * VU); -extern void VU_STAT_UPDATE(VURegs * VU); +extern u32 VU_MACx_UPDATE(VURegs* VU, u32 x); +extern u32 VU_MACy_UPDATE(VURegs* VU, u32 y); +extern u32 VU_MACz_UPDATE(VURegs* VU, u32 z); +extern u32 VU_MACw_UPDATE(VURegs* VU, u32 w); +extern void VU_MACx_CLEAR(VURegs* VU); +extern void VU_MACy_CLEAR(VURegs* VU); +extern void VU_MACz_CLEAR(VURegs* VU); +extern void VU_MACw_CLEAR(VURegs* VU); +extern void VU_STAT_UPDATE(VURegs* VU); diff --git a/pcsx2/VUops.cpp b/pcsx2/VUops.cpp index dfa777e8aeea5..04da937e62d0f 100644 --- a/pcsx2/VUops.cpp +++ b/pcsx2/VUops.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-3.0+ #include "Common.h" +#include "PS2Float.h" #include "VUops.h" #include "GS.h" #include "Gif_Unit.h" @@ -462,34 +463,32 @@ static __fi float vuDouble(u32 f) } #endif -static __fi float vuADD_TriAceHack(u32 a, u32 b) +static __fi u32 vuAccurateAdd(VURegs* VU, u32 a, u32 b) { - // On VU0 TriAce Games use ADDi and expects these bit-perfect results: - //if (a == 0xb3e2a619 && b == 0x42546666) return vuDouble(0x42546666); - //if (a == 0x8b5b19e9 && b == 0xc7f079b3) return vuDouble(0xc7f079b3); - //if (a == 0x4b1ed4a8 && b == 0x43a02666) return vuDouble(0x4b1ed5e7); - //if (a == 0x7d1ca47b && b == 0x42f23333) return vuDouble(0x7d1ca47b); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) return PS2Float(a).Add(PS2Float(b)).AsUInt32(); - // In the 3rd case, some other rounding error is giving us incorrect - // operands ('a' is wrong); and therefor an incorrect result. - // We're getting: 0x4b1ed4a8 + 0x43a02666 = 0x4b1ed5e8 - // We should be getting: 0x4b1ed4a7 + 0x43a02666 = 0x4b1ed5e7 - // microVU gets the correct operands and result. The interps likely - // don't get it due to rounding towards nearest in other calculations. + return std::bit_cast(vuDouble(a) + vuDouble(b)); +} + +static __fi u32 vuAccurateSub(VURegs* VU, u32 a, u32 b) +{ + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) return PS2Float(a).Sub(PS2Float(b)).AsUInt32(); + + return std::bit_cast(vuDouble(a) - vuDouble(b)); +} - // microVU uses something like this to get TriAce games working, - // but VU interpreters don't seem to need it currently: +static __fi u32 vuAccurateMul(VURegs* VU, u32 a, u32 b) +{ + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) return PS2Float(a).Mul(PS2Float(b)).AsUInt32(); - // Update Sept 2021, now the interpreters don't suck, they do - Refraction - s32 aExp = (a >> 23) & 0xff; - s32 bExp = (b >> 23) & 0xff; - if (aExp - bExp >= 25) b &= 0x80000000; - if (aExp - bExp <=-25) a &= 0x80000000; - float ret = vuDouble(a) + vuDouble(b); - //DevCon.WriteLn("aExp = %d, bExp = %d", aExp, bExp); - //DevCon.WriteLn("0x%08x + 0x%08x = 0x%08x", a, b, (u32&)ret); - //DevCon.WriteLn("%f + %f = %f", vuDouble(a), vuDouble(b), ret); - return ret; + return std::bit_cast(vuDouble(a) * vuDouble(b)); +} + +static __fi u32 vuAccurateDiv(VURegs* VU, u32 a, u32 b) +{ + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) return PS2Float(a).Div(PS2Float(b)).AsUInt32(); + + return std::bit_cast(vuDouble(a) / vuDouble(b)); } void _vuABS(VURegs* VU) @@ -497,10 +496,10 @@ void _vuABS(VURegs* VU) if (_Ft_ == 0) return; - if (_X){ VU->VF[_Ft_].f.x = fabs(vuDouble(VU->VF[_Fs_].i.x)); } - if (_Y){ VU->VF[_Ft_].f.y = fabs(vuDouble(VU->VF[_Fs_].i.y)); } - if (_Z){ VU->VF[_Ft_].f.z = fabs(vuDouble(VU->VF[_Fs_].i.z)); } - if (_W){ VU->VF[_Ft_].f.w = fabs(vuDouble(VU->VF[_Fs_].i.w)); } + if (_X) VU->VF[_Ft_].i.x = PS2Float(VU->VF[_Fs_].i.x).Abs(); + if (_Y) VU->VF[_Ft_].i.y = PS2Float(VU->VF[_Fs_].i.y).Abs(); + if (_Z) VU->VF[_Ft_].i.z = PS2Float(VU->VF[_Fs_].i.z).Abs(); + if (_W) VU->VF[_Ft_].i.w = PS2Float(VU->VF[_Fs_].i.w).Abs(); } @@ -512,10 +511,10 @@ static __fi void _vuADD(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -528,20 +527,11 @@ static __fi void _vuADDi(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (!CHECK_VUADDSUBHACK) { - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VI[REG_I].UL));} else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VI[REG_I].UL));} else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VI[REG_I].UL));} else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + vuDouble(VU->VI[REG_I].UL));} else VU_MACw_CLEAR(VU); - VU_STAT_UPDATE(VU); - } - else { - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuADD_TriAceHack(VU->VF[_Fs_].i.x, VU->VI[REG_I].UL));} else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuADD_TriAceHack(VU->VF[_Fs_].i.y, VU->VI[REG_I].UL));} else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuADD_TriAceHack(VU->VF[_Fs_].i.z, VU->VI[REG_I].UL));} else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuADD_TriAceHack(VU->VF[_Fs_].i.w, VU->VI[REG_I].UL));} else VU_MACw_CLEAR(VU); - VU_STAT_UPDATE(VU); - } + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL));} else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL));} else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL));} else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL));} else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); } static __fi void _vuADDq(VURegs* VU) @@ -552,153 +542,133 @@ static __fi void _vuADDq(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + vuDouble(VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL));} else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL));} else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL));} else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDx(VURegs* VU) { - float ftx; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftx=vuDouble(VU->VF[_Ft_].i.x); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + ftx); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + ftx); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + ftx); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + ftx); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDy(VURegs* VU) { - float fty; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - fty=vuDouble(VU->VF[_Ft_].i.y); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + fty);} else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + fty);} else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + fty);} else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + fty);} else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDz(VURegs* VU) { - float ftz; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftz=vuDouble(VU->VF[_Ft_].i.z); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + ftz); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + ftz); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + ftz); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + ftz); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDw(VURegs* VU) { - float ftw; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftw=vuDouble(VU->VF[_Ft_].i.w); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + ftw); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + ftw); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + ftw); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + ftw); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDA(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAi(VURegs* VU) { - float ti = vuDouble(VU->VI[REG_I].UL); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + ti); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + ti); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + ti); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + ti); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAq(VURegs* VU) { - float tf = vuDouble(VU->VI[REG_Q].UL); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + tf); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + tf); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + tf); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + tf); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAx(VURegs* VU) { - float tx = vuDouble(VU->VF[_Ft_].i.x); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + tx); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + tx); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + tx); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + tx); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAy(VURegs* VU) { - float ty = vuDouble(VU->VF[_Ft_].i.y); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + ty); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + ty); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + ty); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + ty); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAz(VURegs* VU) { - float tz = vuDouble(VU->VF[_Ft_].i.z); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + tz); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + tz); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + tz); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + tz); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAw(VURegs* VU) { - float tw = vuDouble(VU->VF[_Ft_].i.w); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + tw); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + tw); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + tw); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + tw); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -711,11 +681,11 @@ static __fi void _vuSUB(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); - VU_STAT_UPDATE(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); } static __fi void _vuSUBi(VURegs* VU) @@ -726,10 +696,10 @@ static __fi void _vuSUBi(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL));} else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL));} else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL));} else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -741,147 +711,131 @@ static __fi void _vuSUBq(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL));} else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL));} else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL));} else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBx(VURegs* VU) { - float ftx; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftx=vuDouble(VU->VF[_Ft_].i.x); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - ftx); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - ftx); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - ftx); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - ftx); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBy(VURegs* VU) { - float fty; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - fty=vuDouble(VU->VF[_Ft_].i.y); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - fty); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - fty); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - fty); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - fty); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBz(VURegs* VU) { - float ftz; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftz=vuDouble(VU->VF[_Ft_].i.z); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - ftz); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - ftz); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - ftz); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - ftz); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBw(VURegs* VU) { - float ftw; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftw=vuDouble(VU->VF[_Ft_].i.w); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - ftw); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - ftw); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - ftw); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - ftw); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBA(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAi(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAq(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAx(VURegs* VU) { - float tx = vuDouble(VU->VF[_Ft_].i.x); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - tx); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - tx); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - tx); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - tx); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAy(VURegs* VU) { - float ty = vuDouble(VU->VF[_Ft_].i.y); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - ty); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - ty); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - ty); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - ty); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAz(VURegs* VU) { - float tz = vuDouble(VU->VF[_Ft_].i.z); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - tz); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - tz); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - tz); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - tz); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAw(VURegs* VU) { - float tw = vuDouble(VU->VF[_Ft_].i.w); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - tw); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - tw); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - tw); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - tw); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -893,10 +847,10 @@ static __fi void _vuMUL(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -908,10 +862,10 @@ static __fi void _vuMULi(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -923,498 +877,540 @@ static __fi void _vuMULq(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULx(VURegs* VU) { - float ftx; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftx=vuDouble(VU->VF[_Ft_].i.x); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * ftx); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * ftx); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * ftx); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * ftx); } else VU_MACw_CLEAR(VU); + u32 ftx = VU->VF[_Ft_].i.x; + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, ftx)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, ftx)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, ftx)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, ftx)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULy(VURegs* VU) { - float fty; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - fty=vuDouble(VU->VF[_Ft_].i.y); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * fty); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * fty); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * fty); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * fty); } else VU_MACw_CLEAR(VU); + u32 fty = VU->VF[_Ft_].i.y; + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, fty)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, fty)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, fty)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, fty)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULz(VURegs* VU) { - float ftz; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftz=vuDouble(VU->VF[_Ft_].i.z); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * ftz); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * ftz); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * ftz); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * ftz); } else VU_MACw_CLEAR(VU); + u32 ftz = VU->VF[_Ft_].i.z; + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, ftz)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, ftz)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, ftz)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, ftz)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULw(VURegs* VU) { - float ftw; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftw=vuDouble(VU->VF[_Ft_].i.w); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * ftw); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * ftw); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * ftw); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * ftw); } else VU_MACw_CLEAR(VU); + u32 ftw = VU->VF[_Ft_].i.w; + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, ftw)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, ftw)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, ftw)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, ftw)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULA(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAi(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAq(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAx(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAy(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAz(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAw(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADD(VURegs* VU) { + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADDi(VURegs* VU) { + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + (vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_I].UL))); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + (vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_I].UL))); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + (vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_I].UL))); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + (vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_I].UL))); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADDq(VURegs* VU) { + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + (vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + (vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + (vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + (vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADDx(VURegs* VU) { - float ftx; + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftx=vuDouble(VU->VF[_Ft_].i.x); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + (vuDouble(VU->VF[_Fs_].i.x) * ftx)); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + (vuDouble(VU->VF[_Fs_].i.y) * ftx)); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + (vuDouble(VU->VF[_Fs_].i.z) * ftx)); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + (vuDouble(VU->VF[_Fs_].i.w) * ftx)); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + u32 ftx = VU->VF[_Ft_].i.x; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, ftx); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, ftx); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, ftx); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, ftx); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADDy(VURegs* VU) { - float fty; + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - fty=vuDouble(VU->VF[_Ft_].i.y); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + (vuDouble(VU->VF[_Fs_].i.x) * fty)); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + (vuDouble(VU->VF[_Fs_].i.y) * fty)); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + (vuDouble(VU->VF[_Fs_].i.z) * fty)); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + (vuDouble(VU->VF[_Fs_].i.w) * fty)); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + u32 fty = VU->VF[_Ft_].i.y; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, fty); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, fty); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, fty); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, fty); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADDz(VURegs* VU) { - float ftz; + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftz=vuDouble(VU->VF[_Ft_].i.z); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + (vuDouble(VU->VF[_Fs_].i.x) * ftz)); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + (vuDouble(VU->VF[_Fs_].i.y) * ftz)); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + (vuDouble(VU->VF[_Fs_].i.z) * ftz)); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + (vuDouble(VU->VF[_Fs_].i.w) * ftz)); else VU_MACw_CLEAR(VU); - VU_STAT_UPDATE(VU); + tmp = &VU->TMP; + u32 ftz = VU->VF[_Ft_].i.z; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, ftz); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, ftz); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, ftz); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, ftz); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); } static __fi void _vuMADDw(VURegs* VU) { - float ftw; + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftw=vuDouble(VU->VF[_Ft_].i.w); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + (vuDouble(VU->VF[_Fs_].i.x) * ftw)); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + (vuDouble(VU->VF[_Fs_].i.y) * ftw)); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + (vuDouble(VU->VF[_Fs_].i.z) * ftw)); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + (vuDouble(VU->VF[_Fs_].i.w) * ftw)); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + u32 ftw = VU->VF[_Ft_].i.w; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, ftw); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, ftw); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, ftw); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, ftw); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMADDA(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + (vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + (vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + (vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + (vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACw_CLEAR(VU); +static __fi void _vuMADDA(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADDAi(VURegs* VU) { - float ti = vuDouble(VU->VI[REG_I].UL); - - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + ( vuDouble(VU->VF[_Fs_].i.x) * ti)); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + ( vuDouble(VU->VF[_Fs_].i.y) * ti)); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + ( vuDouble(VU->VF[_Fs_].i.z) * ti)); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + ( vuDouble(VU->VF[_Fs_].i.w) * ti)); else VU_MACw_CLEAR(VU); + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADDAq(VURegs* VU) { - float tq = vuDouble(VU->VI[REG_Q].UL); - - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + ( vuDouble(VU->VF[_Fs_].i.x) * tq)); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + ( vuDouble(VU->VF[_Fs_].i.y) * tq)); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + ( vuDouble(VU->VF[_Fs_].i.z) * tq)); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + ( vuDouble(VU->VF[_Fs_].i.w) * tq)); else VU_MACw_CLEAR(VU); + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMADDAx(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACw_CLEAR(VU); +static __fi void _vuMADDAx(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMADDAy(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACw_CLEAR(VU); +static __fi void _vuMADDAy(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMADDAz(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACw_CLEAR(VU); +static __fi void _vuMADDAz(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMADDAw(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACw_CLEAR(VU); +static __fi void _vuMADDAw(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMSUB(VURegs* VU) { + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } + static __fi void _vuMSUBi(VURegs* VU) { - float ti = vuDouble(VU->VI[REG_I].UL); + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * ti ) ); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * ti ) ); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * ti ) ); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * ti ) ); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMSUBq(VURegs* VU) { - float tq = vuDouble(VU->VI[REG_Q].UL); + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * tq ) ); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * tq ) ); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * tq ) ); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * tq ) ); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } - static __fi void _vuMSUBx(VURegs* VU) { - float ftx; + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftx=vuDouble(VU->VF[_Ft_].i.x); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * ftx ) ); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * ftx ) ); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * ftx ) ); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * ftx ) ); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + u32 ftx = VU->VF[_Ft_].i.x; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, ftx); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, ftx); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, ftx); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, ftx); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } - static __fi void _vuMSUBy(VURegs* VU) { - float fty; + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - fty=vuDouble(VU->VF[_Ft_].i.y); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * fty ) ); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * fty ) ); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * fty ) ); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * fty ) ); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + u32 fty = VU->VF[_Ft_].i.y; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, fty); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, fty); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, fty); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, fty); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } - static __fi void _vuMSUBz(VURegs* VU) { - float ftz; + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftz=vuDouble(VU->VF[_Ft_].i.z); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * ftz ) ); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * ftz ) ); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * ftz ) ); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * ftz ) ); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + u32 ftz = VU->VF[_Ft_].i.z; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, ftz); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, ftz); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, ftz); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, ftz); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMSUBw(VURegs* VU) { - float ftw; - VECTOR * dst; - if (_Fd_ == 0) dst = &RDzero; - else dst = &VU->VF[_Fd_]; + VECTOR* tmp; + VECTOR* dst; + if (_Fd_ == 0) + dst = &RDzero; + else + dst = &VU->VF[_Fd_]; - ftw=vuDouble(VU->VF[_Ft_].i.w); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * ftw ) ); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * ftw ) ); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * ftw ) ); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * ftw ) ); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + u32 ftw = VU->VF[_Ft_].i.w; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, ftw); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, ftw); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, ftw); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, ftw); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } - -static __fi void _vuMSUBA(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACw_CLEAR(VU); +static __fi void _vuMSUBA(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMSUBAi(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_I].UL))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_I].UL))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_I].UL))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_I].UL))); else VU_MACw_CLEAR(VU); +static __fi void _vuMSUBAi(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMSUBAq(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACw_CLEAR(VU); +static __fi void _vuMSUBAq(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMSUBAx(VURegs* VU) -{ - float tx = vuDouble(VU->VF[_Ft_].i.x); - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * tx)); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * tx)); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * tx)); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * tx)); else VU_MACw_CLEAR(VU); +static __fi void _vuMSUBAx(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + u32 tx = VU->VF[_Ft_].i.x; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, tx); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, tx); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, tx); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, tx); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMSUBAy(VURegs* VU) +static __fi void _vuMSUBAy(VURegs* VU) { - float ty = vuDouble(VU->VF[_Ft_].i.y); - - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * ty)); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * ty)); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * ty)); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * ty)); else VU_MACw_CLEAR(VU); + VECTOR* tmp; + tmp = &VU->TMP; + u32 ty = VU->VF[_Ft_].i.y; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, ty); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, ty); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, ty); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, ty); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMSUBAz(VURegs* VU) +static __fi void _vuMSUBAz(VURegs* VU) { - float tz = vuDouble(VU->VF[_Ft_].i.z); - - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * tz)); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * tz)); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * tz)); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * tz)); else VU_MACw_CLEAR(VU); + VECTOR* tmp; + tmp = &VU->TMP; + u32 tz = VU->VF[_Ft_].i.z; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, tz); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, tz); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, tz); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, tz); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMSUBAw(VURegs* VU) +static __fi void _vuMSUBAw(VURegs* VU) { - float tw = vuDouble(VU->VF[_Ft_].i.w); - - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * tw)); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * tw)); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * tw)); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * tw)); else VU_MACw_CLEAR(VU); + VECTOR* tmp; + tmp = &VU->TMP; + u32 tw = VU->VF[_Ft_].i.w; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, tw); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, tw); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, tw); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, tw); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1575,32 +1571,28 @@ static __fi void _vuMINIw(VURegs* VU) static __fi void _vuOPMULA(VURegs* VU) { - VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.z)); - VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.x)); - VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.y)); + VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z)); + VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x)); + VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y)); VU_STAT_UPDATE(VU); } static __fi void _vuOPMSUB(VURegs* VU) { + VECTOR* tmp; VECTOR* dst; - float ftx, fty, ftz; - float fsx, fsy, fsz; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftx = vuDouble(VU->VF[_Ft_].i.x); - fty = vuDouble(VU->VF[_Ft_].i.y); - ftz = vuDouble(VU->VF[_Ft_].i.z); - fsx = vuDouble(VU->VF[_Fs_].i.x); - fsy = vuDouble(VU->VF[_Fs_].i.y); - fsz = vuDouble(VU->VF[_Fs_].i.z); - - dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - fsy * ftz); - dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - fsz * ftx); - dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - fsx * fty); + tmp = &VU->TMP; + tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z); + tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x); + tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y); + dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x)); + dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y)); + dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z)); VU_STAT_UPDATE(VU); } @@ -1617,13 +1609,40 @@ static __fi s32 float_to_int(float value) return value; } +static __fi s32 double_to_int(double value) +{ + if (value >= 2147483647.0) + return 2147483647LL; + if (value <= -2147483648.0) + return -2147483648LL; + return value; +} + static __fi void _vuFTOI0(VURegs* VU) { if (_Ft_ == 0) return; - if (_X) VU->VF[_Ft_].SL[0] = float_to_int(vuDouble(VU->VF[_Fs_].i.x)); - if (_Y) VU->VF[_Ft_].SL[1] = float_to_int(vuDouble(VU->VF[_Fs_].i.y)); - if (_Z) VU->VF[_Ft_].SL[2] = float_to_int(vuDouble(VU->VF[_Fs_].i.z)); - if (_W) VU->VF[_Ft_].SL[3] = float_to_int(vuDouble(VU->VF[_Fs_].i.w)); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + if (_X) + VU->VF[_Ft_].SL[0] = double_to_int(PS2Float(VU->VF[_Fs_].i.x).ToDouble()); + if (_Y) + VU->VF[_Ft_].SL[1] = double_to_int(PS2Float(VU->VF[_Fs_].i.y).ToDouble()); + if (_Z) + VU->VF[_Ft_].SL[2] = double_to_int(PS2Float(VU->VF[_Fs_].i.z).ToDouble()); + if (_W) + VU->VF[_Ft_].SL[3] = double_to_int(PS2Float(VU->VF[_Fs_].i.w).ToDouble()); + } + else + { + if (_X) + VU->VF[_Ft_].SL[0] = float_to_int(vuDouble(VU->VF[_Fs_].i.x)); + if (_Y) + VU->VF[_Ft_].SL[1] = float_to_int(vuDouble(VU->VF[_Fs_].i.y)); + if (_Z) + VU->VF[_Ft_].SL[2] = float_to_int(vuDouble(VU->VF[_Fs_].i.z)); + if (_W) + VU->VF[_Ft_].SL[3] = float_to_int(vuDouble(VU->VF[_Fs_].i.w)); + } } static __fi void _vuFTOI4(VURegs* VU) { @@ -1703,15 +1722,31 @@ static __fi void _vuITOF15(VURegs* VU) static __fi void _vuCLIP(VURegs* VU) { - float value = fabs(vuDouble(VU->VF[_Ft_].i.w)); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + double value = PS2Float(PS2Float(VU->VF[_Ft_].i.w).Abs()).ToDouble(); + + VU->clipflag <<= 6; + if (PS2Float(VU->VF[_Fs_].i.x).ToDouble() > +value) VU->clipflag |= 0x01; + if (PS2Float(VU->VF[_Fs_].i.x).ToDouble() < -value) VU->clipflag |= 0x02; + if (PS2Float(VU->VF[_Fs_].i.y).ToDouble() > +value) VU->clipflag |= 0x04; + if (PS2Float(VU->VF[_Fs_].i.y).ToDouble() < -value) VU->clipflag |= 0x08; + if (PS2Float(VU->VF[_Fs_].i.z).ToDouble() > +value) VU->clipflag |= 0x10; + if (PS2Float(VU->VF[_Fs_].i.z).ToDouble() < -value) VU->clipflag |= 0x20; + } + else + { + float value = fabs(vuDouble(VU->VF[_Ft_].i.w)); + + VU->clipflag <<= 6; + if (vuDouble(VU->VF[_Fs_].i.x) > +value) VU->clipflag |= 0x01; + if (vuDouble(VU->VF[_Fs_].i.x) < -value) VU->clipflag |= 0x02; + if (vuDouble(VU->VF[_Fs_].i.y) > +value) VU->clipflag |= 0x04; + if (vuDouble(VU->VF[_Fs_].i.y) < -value) VU->clipflag |= 0x08; + if (vuDouble(VU->VF[_Fs_].i.z) > +value) VU->clipflag |= 0x10; + if (vuDouble(VU->VF[_Fs_].i.z) < -value) VU->clipflag |= 0x20; + } - VU->clipflag <<= 6; - if ( vuDouble(VU->VF[_Fs_].i.x) > +value ) VU->clipflag|= 0x01; - if ( vuDouble(VU->VF[_Fs_].i.x) < -value ) VU->clipflag|= 0x02; - if ( vuDouble(VU->VF[_Fs_].i.y) > +value ) VU->clipflag|= 0x04; - if ( vuDouble(VU->VF[_Fs_].i.y) < -value ) VU->clipflag|= 0x08; - if ( vuDouble(VU->VF[_Fs_].i.z) > +value ) VU->clipflag|= 0x10; - if ( vuDouble(VU->VF[_Fs_].i.z) < -value ) VU->clipflag|= 0x20; VU->clipflag = VU->clipflag & 0xFFFFFF; } @@ -1721,84 +1756,175 @@ static __fi void _vuCLIP(VURegs* VU) static __fi void _vuDIV(VURegs* VU) { - float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); - float fs = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + { + PS2Float ft = PS2Float(VU->VF[_Ft_].UL[_Ftf_]); + PS2Float fs = PS2Float(VU->VF[_Fs_].UL[_Fsf_]); - VU->statusflag &= ~0x30; + VU->statusflag &= ~0x30; - if (ft == 0.0) - { - if (fs == 0.0) - VU->statusflag |= 0x10; - else - VU->statusflag |= 0x20; + if (ft.IsZero()) + { + if (fs.IsZero()) + VU->statusflag |= 0x10; + else + VU->statusflag |= 0x20; - if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ - (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) - VU->q.UL = 0xFF7FFFFF; + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ + (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) + VU->q.UL = PS2Float::MIN_FLOATING_POINT_VALUE; + else + VU->q.UL = PS2Float::MAX_FLOATING_POINT_VALUE; + } else - VU->q.UL = 0x7F7FFFFF; + { + VU->q.UL = fs.Div(ft).AsUInt32(); + } } else { - VU->q.F = fs / ft; - VU->q.F = vuDouble(VU->q.UL); + float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); + float fs = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); + + VU->statusflag &= ~0x30; + + if (ft == 0.0) + { + if (fs == 0.0) + VU->statusflag |= 0x10; + else + VU->statusflag |= 0x20; + + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ + (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) + VU->q.UL = 0xFF7FFFFF; + else + VU->q.UL = 0x7F7FFFFF; + } + else + { + VU->q.F = fs / ft; + VU->q.F = vuDouble(VU->q.UL); + } } } static __fi void _vuSQRT(VURegs* VU) { - float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + PS2Float ft = PS2Float(VU->VF[_Ft_].UL[_Ftf_]); - VU->statusflag &= ~0x30; + VU->statusflag &= ~0x30; - if (ft < 0.0) - VU->statusflag |= 0x10; - VU->q.F = sqrt(fabs(ft)); - VU->q.F = vuDouble(VU->q.UL); + if (ft.ToDouble() < 0.0) + VU->statusflag |= 0x10; + VU->q.UL = PS2Float(ft.Abs()).Sqrt().AsUInt32(); + } + else + { + float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); + + VU->statusflag &= ~0x30; + + if (ft < 0.0) + VU->statusflag |= 0x10; + VU->q.F = sqrt(fabs(ft)); + VU->q.F = vuDouble(VU->q.UL); + } } static __fi void _vuRSQRT(VURegs* VU) { - float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); - float fs = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - float temp; - - VU->statusflag &= ~0x30; - - if (ft == 0.0) + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { - VU->statusflag |= 0x20; + PS2Float ft = PS2Float(VU->VF[_Ft_].UL[_Ftf_]); + PS2Float fs = PS2Float(VU->VF[_Fs_].UL[_Fsf_]); - if (fs != 0) + VU->statusflag &= ~0x30; + + if (ft.IsZero()) { - if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ - (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) - VU->q.UL = 0xFF7FFFFF; + VU->statusflag |= 0x20; + + if (!fs.IsZero()) + { + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ + (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) + VU->q.UL = PS2Float::MIN_FLOATING_POINT_VALUE; + else + VU->q.UL = PS2Float::MAX_FLOATING_POINT_VALUE; + } else - VU->q.UL = 0x7F7FFFFF; + { + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ + (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) + VU->q.UL = 0x80000000; + else + VU->q.UL = 0; + + VU->statusflag |= 0x10; + } } else { - if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ - (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) - VU->q.UL = 0x80000000; - else - VU->q.UL = 0; + if (ft.ToDouble() < 0.0) + { + VU->statusflag |= 0x10; + } - VU->statusflag |= 0x10; + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + VU->q.UL = fs.Div(PS2Float(ft.Abs()).Sqrt()).AsUInt32(); + else + { + float temp = sqrt(fabs(vuDouble(ft.AsUInt32()))); + VU->q.F = vuDouble(fs.AsUInt32()) / temp; + VU->q.F = vuDouble(VU->q.UL); + } } } else { - if (ft < 0.0) + float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); + float fs = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); + float temp; + + VU->statusflag &= ~0x30; + + if (ft == 0.0) { - VU->statusflag |= 0x10; + VU->statusflag |= 0x20; + + if (fs != 0) + { + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ + (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) + VU->q.UL = 0xFF7FFFFF; + else + VU->q.UL = 0x7F7FFFFF; + } + else + { + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ + (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) + VU->q.UL = 0x80000000; + else + VU->q.UL = 0; + + VU->statusflag |= 0x10; + } } + else + { + if (ft < 0.0) + { + VU->statusflag |= 0x10; + } - temp = sqrt(fabs(ft)); - VU->q.F = fs / temp; - VU->q.F = vuDouble(VU->q.UL); + temp = sqrt(fabs(ft)); + VU->q.F = fs / temp; + VU->q.F = vuDouble(VU->q.UL); + } } } @@ -2442,56 +2568,118 @@ static __ri void _vuWAITP(VURegs* VU) static __ri void _vuESADD(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); + u32 x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x); + u32 y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y); + u32 z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z); - VU->p.F = p; + VU->p.UL = vuAccurateAdd(VU, vuAccurateAdd(VU, x, y), z); } static __ri void _vuERSADD(VURegs* VU) { - float p = (vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x)) + (vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y)) + (vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z)); + u32 x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x); + u32 y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y); + u32 z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z); - if (p != 0.0) - p = 1.0f / p; + PS2Float p = PS2Float(vuAccurateAdd(VU, vuAccurateAdd(VU, x, y), z)); - VU->p.F = p; + if (!p.IsZero()) + { + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + p = PS2Float::One().Div(p); + else + { + VU->p.F = 1.0f / vuDouble(p.AsUInt32()); + return; + } + } + + VU->p.UL = p.AsUInt32(); } static __ri void _vuELENG(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); + u32 x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x); + u32 y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y); + u32 z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z); - if (p >= 0) + PS2Float value = PS2Float(vuAccurateAdd(VU, vuAccurateAdd(VU, x, y), z)); + + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { - p = sqrt(p); + if (value.ToDouble() >= 0) + { + value = value.Sqrt(); + } + VU->p.UL = value.AsUInt32(); + } + else + { + float p = vuDouble(value.AsUInt32()); + + if (p >= 0) + { + p = sqrt(p); + } + VU->p.F = p; } - VU->p.F = p; } static __ri void _vuERLENG(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); + u32 x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x); + u32 y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y); + u32 z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z); + + PS2Float value = PS2Float(vuAccurateAdd(VU, vuAccurateAdd(VU, x, y), z)); - if (p >= 0) + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { - p = sqrt(p); - if (p != 0) + if (value.ToDouble() >= 0) { - p = 1.0f / p; + value = value.Sqrt(); + if (!value.IsZero()) + { + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + { + value = PS2Float::One().Div(value); + } + else + { + VU->p.F = 1.0 / vuDouble(value.AsUInt32()); + return; + } + } } + VU->p.UL = value.AsUInt32(); + } + else + { + float p = vuDouble(value.AsUInt32()); + + if (p >= 0) + { + p = sqrt(p); + if (p != 0) + { + p = 1.0f / p; + } + } + VU->p.F = p; } - VU->p.F = p; } -static __ri float _vuCalculateEATAN(float inputvalue) { +static __ri float _vuCalculateEATAN(u32 inputvalue) { + + float fvalue = vuDouble(inputvalue); + float eatanconst[9] = { 0.999999344348907f, -0.333298563957214f, 0.199465364217758f, -0.13085337519646f, 0.096420042216778f, -0.055909886956215f, 0.021861229091883f, -0.004054057877511f, 0.785398185253143f }; - float result = (eatanconst[0] * inputvalue) + (eatanconst[1] * pow(inputvalue, 3)) + (eatanconst[2] * pow(inputvalue, 5)) - + (eatanconst[3] * pow(inputvalue, 7)) + (eatanconst[4] * pow(inputvalue, 9)) + (eatanconst[5] * pow(inputvalue, 11)) - + (eatanconst[6] * pow(inputvalue, 13)) + (eatanconst[7] * pow(inputvalue, 15)); + float result = (eatanconst[0] * fvalue) + (eatanconst[1] * pow(fvalue, 3)) + (eatanconst[2] * pow(fvalue, 5)) + (eatanconst[3] * pow(fvalue, 7)) + + (eatanconst[4] * pow(fvalue, 9)) + (eatanconst[5] * pow(fvalue, 11)) + (eatanconst[6] * pow(fvalue, 13)) + (eatanconst[7] * pow(fvalue, 15)); result += eatanconst[8]; @@ -2502,16 +2690,16 @@ static __ri float _vuCalculateEATAN(float inputvalue) { static __ri void _vuEATAN(VURegs* VU) { - float p = _vuCalculateEATAN(vuDouble(VU->VF[_Fs_].UL[_Fsf_])); + float p = _vuCalculateEATAN(VU->VF[_Fs_].UL[_Fsf_]); VU->p.F = p; } static __ri void _vuEATANxy(VURegs* VU) { float p = 0; - if (vuDouble(VU->VF[_Fs_].i.x) != 0) + if (!PS2Float(VU->VF[_Fs_].i.x).IsZero()) { - p = _vuCalculateEATAN(vuDouble(VU->VF[_Fs_].i.y) / vuDouble(VU->VF[_Fs_].i.x)); + p = _vuCalculateEATAN(vuAccurateDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.x)); } VU->p.F = p; } @@ -2519,57 +2707,104 @@ static __ri void _vuEATANxy(VURegs* VU) static __ri void _vuEATANxz(VURegs* VU) { float p = 0; - if (vuDouble(VU->VF[_Fs_].i.x) != 0) + if (!PS2Float(VU->VF[_Fs_].i.x).IsZero()) { - p = _vuCalculateEATAN(vuDouble(VU->VF[_Fs_].i.z) / vuDouble(VU->VF[_Fs_].i.x)); + p = _vuCalculateEATAN(vuAccurateDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.x)); } VU->p.F = p; } static __ri void _vuESUM(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VF[_Fs_].i.w); - VU->p.F = p; + VU->p.UL = vuAccurateAdd(VU, vuAccurateAdd(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.y), VU->VF[_Fs_].i.z), VU->VF[_Fs_].i.w); } static __ri void _vuERCPR(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); + PS2Float p = PS2Float(VU->VF[_Fs_].UL[_Fsf_]); - if (p != 0) + if (!p.IsZero()) { - p = 1.0 / p; + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + { + p = PS2Float::One().Div(p); + } + else + { + VU->p.F = 1.0 / vuDouble(p.AsUInt32()); + return; + } } - VU->p.F = p; + VU->p.UL = p.AsUInt32(); } static __ri void _vuESQRT(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - - if (p >= 0) + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { - p = sqrt(p); + PS2Float value = PS2Float(VU->VF[_Fs_].UL[_Fsf_]); + + if (value.ToDouble() >= 0) + { + value = value.Sqrt(); + } + + VU->p.UL = value.AsUInt32(); } + else + { + float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - VU->p.F = p; + if (p >= 0) + { + p = sqrt(p); + } + + VU->p.F = p; + } } static __ri void _vuERSQRT(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - - if (p >= 0) + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { - p = sqrt(p); - if (p) + PS2Float value = PS2Float(VU->VF[_Fs_].UL[_Fsf_]); + + if (value.ToDouble() >= 0) { - p = 1.0f / p; + value = value.Sqrt(); + if (!value.IsZero()) + { + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + { + VU->p.F = 1.0f / vuDouble(value.AsUInt32()); + return; + } + else + { + value = PS2Float::One().Div(value); + } + } } + + VU->p.UL = value.AsUInt32(); } + else + { + float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - VU->p.F = p; + if (p >= 0) + { + p = sqrt(p); + if (p) + { + p = 1.0f / p; + } + } + + VU->p.F = p; + } } static __ri void _vuESIN(VURegs* VU) diff --git a/pcsx2/pcsx2.vcxproj b/pcsx2/pcsx2.vcxproj index c7f68793cf98d..3a6555254b0a8 100644 --- a/pcsx2/pcsx2.vcxproj +++ b/pcsx2/pcsx2.vcxproj @@ -281,6 +281,7 @@ + @@ -726,6 +727,7 @@ + @@ -1025,4 +1027,4 @@ - + \ No newline at end of file diff --git a/pcsx2/pcsx2.vcxproj.filters b/pcsx2/pcsx2.vcxproj.filters index 51782a5fcf9d6..0fa8c4cd9fa53 100644 --- a/pcsx2/pcsx2.vcxproj.filters +++ b/pcsx2/pcsx2.vcxproj.filters @@ -289,6 +289,9 @@ {cd8ec519-2196-43f7-86de-7faced2d4296} + + {9a40984b-cb23-4a54-a5e9-9c54f3c16c5b} + @@ -1443,6 +1446,9 @@ System\Ps2\Iop\SIO\PAD + + System\Ps2\EmotionEngine\Shared + @@ -2399,6 +2405,9 @@ System\Ps2\Iop\SIO\PAD + + System\Ps2\EmotionEngine\Shared + @@ -2428,4 +2437,4 @@ System\Ps2\GS - + \ No newline at end of file