From 721995485a060e1fc87754689fe801d3beec5db0 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 23 Feb 2023 08:47:22 -0800 Subject: [PATCH] Adding more SIMD constant folding support (#82190) * Adding more SIMD constant folding support * Adding tests for the new SIMD constant folding paths * Ensure bitcasting float/double is using well-defined behavior --- src/coreclr/jit/simd.h | 172 ++- src/coreclr/jit/utils.cpp | 64 + src/coreclr/jit/utils.h | 8 + src/coreclr/jit/valuenum.cpp | 1027 +++++++++++++++-- src/coreclr/jit/valuenum.h | 12 +- .../ConstantFolding/SimdConstantFoldings.cs | 504 +++++++- 6 files changed, 1687 insertions(+), 100 deletions(-) diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index 526c032480708..c1a0dd629ed5f 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -149,6 +149,39 @@ struct simd32_t } }; +template +TBase EvaluateUnaryScalarSpecialized(genTreeOps oper, TBase arg0) +{ + switch (oper) + { + case GT_NOT: + { + return ~arg0; + } + + default: + { + unreached(); + } + } +} + +template <> +inline float EvaluateUnaryScalarSpecialized(genTreeOps oper, float arg0) +{ + uint32_t arg0Bits = BitOperations::SingleToUInt32Bits(arg0); + uint32_t resultBits = EvaluateUnaryScalarSpecialized(oper, arg0Bits); + return BitOperations::UInt32BitsToSingle(resultBits); +} + +template <> +inline double EvaluateUnaryScalarSpecialized(genTreeOps oper, double arg0) +{ + uint64_t arg0Bits = BitOperations::DoubleToUInt64Bits(arg0); + uint64_t resultBits = EvaluateUnaryScalarSpecialized(oper, arg0Bits); + return BitOperations::UInt64BitsToDouble(resultBits); +} + template TBase EvaluateUnaryScalar(genTreeOps oper, TBase arg0) { @@ -161,7 +194,7 @@ TBase EvaluateUnaryScalar(genTreeOps oper, TBase arg0) default: { - unreached(); + return EvaluateUnaryScalarSpecialized(oper, arg0); } } } @@ -268,6 +301,119 @@ void EvaluateUnarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd* } } +template +TBase EvaluateBinaryScalarRSZ(TBase arg0, TBase arg1) +{ + return arg0 >> (arg1 & ((sizeof(TBase) * 8) - 1)); +} + +template <> +inline int8_t EvaluateBinaryScalarRSZ(int8_t arg0, int8_t arg1) +{ + uint8_t arg0Bits = static_cast(arg0); + uint8_t arg1Bits = static_cast(arg1); + + uint8_t resultBits = EvaluateBinaryScalarRSZ(arg0Bits, arg1Bits); + return static_cast(resultBits); +} + +template <> +inline int16_t EvaluateBinaryScalarRSZ(int16_t arg0, int16_t arg1) +{ + uint16_t arg0Bits = static_cast(arg0); + uint16_t arg1Bits = static_cast(arg1); + + uint16_t resultBits = EvaluateBinaryScalarRSZ(arg0Bits, arg1Bits); + return static_cast(resultBits); +} + +template <> +inline int32_t EvaluateBinaryScalarRSZ(int32_t arg0, int32_t arg1) +{ + uint32_t arg0Bits = static_cast(arg0); + uint32_t arg1Bits = static_cast(arg1); + + uint32_t resultBits = EvaluateBinaryScalarRSZ(arg0Bits, arg1Bits); + return static_cast(resultBits); +} + +template <> +inline int64_t EvaluateBinaryScalarRSZ(int64_t arg0, int64_t arg1) +{ + uint64_t arg0Bits = static_cast(arg0); + uint64_t arg1Bits = static_cast(arg1); + + uint64_t resultBits = EvaluateBinaryScalarRSZ(arg0Bits, arg1Bits); + return static_cast(resultBits); +} + +template +TBase EvaluateBinaryScalarSpecialized(genTreeOps oper, TBase arg0, TBase arg1) +{ + switch (oper) + { + case GT_AND: + { + return arg0 & arg1; + } + + case GT_AND_NOT: + { + return arg0 & ~arg1; + } + + case GT_LSH: + { + return arg0 << (arg1 & ((sizeof(TBase) * 8) - 1)); + } + + case GT_OR: + { + return arg0 | arg1; + } + + case GT_RSH: + { + return arg0 >> (arg1 & ((sizeof(TBase) * 8) - 1)); + } + + case GT_RSZ: + { + return EvaluateBinaryScalarRSZ(arg0, arg1); + } + + case GT_XOR: + { + return arg0 ^ arg1; + } + + default: + { + unreached(); + } + } +} + +template <> +inline float EvaluateBinaryScalarSpecialized(genTreeOps oper, float arg0, float arg1) +{ + uint32_t arg0Bits = BitOperations::SingleToUInt32Bits(arg0); + uint32_t arg1Bits = BitOperations::SingleToUInt32Bits(arg1); + + uint32_t resultBits = EvaluateBinaryScalarSpecialized(oper, arg0Bits, arg1Bits); + return BitOperations::UInt32BitsToSingle(resultBits); +} + +template <> +inline double EvaluateBinaryScalarSpecialized(genTreeOps oper, double arg0, double arg1) +{ + uint64_t arg0Bits = BitOperations::DoubleToUInt64Bits(arg0); + uint64_t arg1Bits = BitOperations::DoubleToUInt64Bits(arg1); + + uint64_t resultBits = EvaluateBinaryScalarSpecialized(oper, arg0Bits, arg1Bits); + return BitOperations::UInt64BitsToDouble(resultBits); +} + template TBase EvaluateBinaryScalar(genTreeOps oper, TBase arg0, TBase arg1) { @@ -278,6 +424,16 @@ TBase EvaluateBinaryScalar(genTreeOps oper, TBase arg0, TBase arg1) return arg0 + arg1; } + case GT_DIV: + { + return arg0 / arg1; + } + + case GT_MUL: + { + return arg0 * arg1; + } + case GT_SUB: { return arg0 - arg1; @@ -285,7 +441,7 @@ TBase EvaluateBinaryScalar(genTreeOps oper, TBase arg0, TBase arg1) default: { - unreached(); + return EvaluateBinaryScalarSpecialized(oper, arg0, arg1); } } } @@ -395,6 +551,18 @@ void EvaluateBinarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd* } } +template +void BroadcastConstantToSimd(TSimd* result, TBase arg0) +{ + uint32_t count = sizeof(TSimd) / sizeof(TBase); + + for (uint32_t i = 0; i < count; i++) + { + // Safely execute `result[i] = arg0` + memcpy(&result->u8[i * sizeof(TBase)], &arg0, sizeof(TBase)); + } +} + #ifdef FEATURE_SIMD #ifdef TARGET_XARCH diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index bf5181be76a0a..93bcf7435c853 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -2645,6 +2645,22 @@ uint32_t BitOperations::BitScanReverse(uint64_t value) #endif } +//------------------------------------------------------------------------ +// BitOperations::DoubleToUInt64Bits: Gets the underlying bits for a double-precision floating-point value. +// +// Arguments: +// value - The number to convert +// +// Return Value: +// The underlying bits for value. +// +uint64_t BitOperations::DoubleToUInt64Bits(double value) +{ + uint64_t result; + memcpy(&result, &value, sizeof(double)); + return result; +} + //------------------------------------------------------------------------ // BitOperations::LeadingZeroCount: Count the number of leading zero bits in a mask. // @@ -2932,6 +2948,22 @@ uint64_t BitOperations::RotateRight(uint64_t value, uint32_t offset) return (value >> (offset & 0x3F)) | (value << ((64 - offset) & 0x3F)); } +//------------------------------------------------------------------------ +// BitOperations::SingleToUInt32Bits: Gets the underlying bits for a single-precision floating-point value. +// +// Arguments: +// value - The number to convert +// +// Return Value: +// The underlying bits for value. +// +uint32_t BitOperations::SingleToUInt32Bits(float value) +{ + uint32_t result; + memcpy(&result, &value, sizeof(float)); + return result; +} + //------------------------------------------------------------------------ // BitOperations::TrailingZeroCount: Count the number of trailing zero bits in an integer value. // @@ -2980,6 +3012,38 @@ uint32_t BitOperations::TrailingZeroCount(uint64_t value) #endif } +//------------------------------------------------------------------------ +// BitOperations::UInt32BitsToSingle: Gets a single-precision floating-point from its underlying bit value. +// +// Arguments: +// value - The underlying bit value. +// +// Return Value: +// The single-precision floating-point from value. +// +float BitOperations::UInt32BitsToSingle(uint32_t value) +{ + float result; + memcpy(&result, &value, sizeof(uint32_t)); + return result; +} + +//------------------------------------------------------------------------ +// BitOperations::UInt64BitsToDouble: Gets a double-precision floating-point from its underlying bit value. +// +// Arguments: +// value - The underlying bit value. +// +// Return Value: +// The double-precision floating-point from value. +// +double BitOperations::UInt64BitsToDouble(uint64_t value) +{ + double result; + memcpy(&result, &value, sizeof(uint64_t)); + return result; +} + namespace MagicDivide { template diff --git a/src/coreclr/jit/utils.h b/src/coreclr/jit/utils.h index 6f661eef4d15c..d78000bfb7d26 100644 --- a/src/coreclr/jit/utils.h +++ b/src/coreclr/jit/utils.h @@ -751,6 +751,8 @@ class BitOperations static uint32_t BitScanReverse(uint64_t value); + static uint64_t DoubleToUInt64Bits(double value); + static uint32_t LeadingZeroCount(uint32_t value); static uint32_t LeadingZeroCount(uint64_t value); @@ -775,9 +777,15 @@ class BitOperations static uint64_t RotateRight(uint64_t value, uint32_t offset); + static uint32_t SingleToUInt32Bits(float value); + static uint32_t TrailingZeroCount(uint32_t value); static uint32_t TrailingZeroCount(uint64_t value); + + static float UInt32BitsToSingle(uint32_t value); + + static double UInt64BitsToDouble(uint64_t value); }; // The CLR requires that critical section locks be initialized via its ClrCreateCriticalSection API...but diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index c43bb47a36f8d..29603b28414e3 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -1999,11 +1999,195 @@ ValueNum ValueNumStore::VNOneForType(var_types typ) return VNForDoubleCon(1.0); default: + { + assert(!varTypeIsSIMD(typ)); + return NoVN; + } + } +} + +ValueNum ValueNumStore::VNAllBitsForType(var_types typ) +{ + switch (typ) + { + case TYP_INT: + case TYP_UINT: + { + return VNForIntCon(0xFFFFFFFF); + } + + case TYP_LONG: + case TYP_ULONG: + { + return VNForLongCon(0xFFFFFFFFFFFFFFFF); + } + +#ifdef FEATURE_SIMD + case TYP_SIMD8: + { + simd8_t cnsVal; + + cnsVal.u32[0] = 0xFFFFFFFF; + cnsVal.u32[1] = 0xFFFFFFFF; + + return VNForSimd8Con(cnsVal); + } + + case TYP_SIMD12: + { + simd12_t cnsVal; + + cnsVal.u32[0] = 0xFFFFFFFF; + cnsVal.u32[1] = 0xFFFFFFFF; + cnsVal.u32[2] = 0xFFFFFFFF; + + return VNForSimd12Con(cnsVal); + } + + case TYP_SIMD16: + { + simd16_t cnsVal; + + cnsVal.u32[0] = 0xFFFFFFFF; + cnsVal.u32[1] = 0xFFFFFFFF; + cnsVal.u32[2] = 0xFFFFFFFF; + cnsVal.u32[3] = 0xFFFFFFFF; + + return VNForSimd16Con(cnsVal); + } + + case TYP_SIMD32: + { + simd32_t cnsVal; + + cnsVal.u32[0] = 0xFFFFFFFF; + cnsVal.u32[1] = 0xFFFFFFFF; + cnsVal.u32[2] = 0xFFFFFFFF; + cnsVal.u32[3] = 0xFFFFFFFF; + + cnsVal.u32[4] = 0xFFFFFFFF; + cnsVal.u32[5] = 0xFFFFFFFF; + cnsVal.u32[6] = 0xFFFFFFFF; + cnsVal.u32[7] = 0xFFFFFFFF; + + return VNForSimd32Con(cnsVal); + } +#endif // FEATURE_SIMD + + default: + { return NoVN; + } } } #ifdef FEATURE_SIMD +ValueNum ValueNumStore::VNOneForSimdType(var_types simdType, var_types simdBaseType) +{ + assert(varTypeIsSIMD(simdType)); + + simd32_t simd32Val = {}; + int simdSize = genTypeSize(simdType); + + switch (simdBaseType) + { + case TYP_BYTE: + case TYP_UBYTE: + { + for (int i = 0; i < simdSize; i++) + { + simd32Val.u8[i] = 1; + } + break; + } + + case TYP_SHORT: + case TYP_USHORT: + { + for (int i = 0; i < (simdSize / 2); i++) + { + simd32Val.u16[i] = 1; + } + break; + } + + case TYP_INT: + case TYP_UINT: + { + for (int i = 0; i < (simdSize / 4); i++) + { + simd32Val.u32[i] = 1; + } + break; + } + + case TYP_LONG: + case TYP_ULONG: + { + for (int i = 0; i < (simdSize / 8); i++) + { + simd32Val.u64[i] = 1; + } + break; + } + + case TYP_FLOAT: + { + for (int i = 0; i < (simdSize / 4); i++) + { + simd32Val.f32[i] = 1.0f; + } + break; + } + + case TYP_DOUBLE: + { + for (int i = 0; i < (simdSize / 8); i++) + { + simd32Val.f64[i] = 1.0; + } + break; + } + + default: + { + unreached(); + } + } + + switch (simdType) + { + case TYP_SIMD8: + { + return VNForSimd8Con(simd32Val.v64[0]); + } + + case TYP_SIMD12: + { + assert(simdBaseType == TYP_FLOAT); + + simd12_t simd12Val; + memcpy(&simd12Val, &simd32Val.f32, sizeof(simd12_t)); + return VNForSimd12Con(simd12Val); + } + + case TYP_SIMD16: + { + return VNForSimd16Con(simd32Val.v128[0]); + } + + case TYP_SIMD32: + { + return VNForSimd32Con(simd32Val); + } + + default: + { + unreached(); + } + } +} + ValueNum ValueNumStore::VNForSimdType(unsigned simdSize, CorInfoType simdBaseJitType) { ValueNum baseTypeVN = VNForIntCon(INT32(simdBaseJitType)); @@ -4262,6 +4446,7 @@ ValueNum ValueNumStore::EvalUsingMathIdentity(var_types typ, VNFunc func, ValueN { // (0 * x) == 0 // (x * 0) == 0 + // This identity does not apply for floating-point (when x == -0.0, NaN, +Inf, -Inf) ValueNum ZeroVN = VNZeroForType(typ); if (arg0VN == ZeroVN) { @@ -4271,18 +4456,19 @@ ValueNum ValueNumStore::EvalUsingMathIdentity(var_types typ, VNFunc func, ValueN { return ZeroVN; } + } - // (x * 1) == x - // (1 * x) == x - ValueNum OneVN = VNOneForType(typ); - if (arg0VN == OneVN) - { - return arg1VN; - } - else if (arg1VN == OneVN) - { - return arg0VN; - } + // (x * 1) == x + // (1 * x) == x + // This is safe for all floats since we do not fault for sNaN + ValueNum OneVN = VNOneForType(typ); + if (arg0VN == OneVN) + { + return arg1VN; + } + else if (arg1VN == OneVN) + { + return arg0VN; } return NoVN; @@ -4293,6 +4479,7 @@ ValueNum ValueNumStore::EvalUsingMathIdentity(var_types typ, VNFunc func, ValueN { ValueNum ZeroVN; ValueNum OneVN; + ValueNum AllBitsVN; switch (genTreeOps(func)) { @@ -4310,53 +4497,124 @@ ValueNum ValueNumStore::EvalUsingMathIdentity(var_types typ, VNFunc func, ValueN case GT_DIV: case GT_UDIV: + { // (x / 1) == x - // This identity does not apply for floating point - // - if (!varTypeIsFloating(typ)) + // This is safe for all floats since we do not fault for sNaN + OneVN = VNOneForType(typ); + + if (arg1VN == OneVN) { - OneVN = VNOneForType(typ); - if (arg1VN == OneVN) - { - resultVN = arg0VN; - } + resultVN = arg0VN; } break; + } case GT_OR: + { + // (0 | x) == x + // (x | 0) == x + ZeroVN = VNZeroForType(typ); + if (arg0VN == ZeroVN) + { + resultVN = arg1VN; + break; + } + else if (arg1VN == ZeroVN) + { + resultVN = arg0VN; + break; + } + + // (x | ~0) == ~0 + // (~0 | x) == ~0 + AllBitsVN = VNAllBitsForType(typ); + if (arg0VN == AllBitsVN) + { + resultVN = AllBitsVN; + break; + } + else if (arg1VN == AllBitsVN) + { + resultVN = AllBitsVN; + break; + } + + // x | x == x + if (arg0VN == arg1VN) + { + resultVN = arg0VN; + } + break; + } + case GT_XOR: - // (0 | x) == x, (0 ^ x) == x - // (x | 0) == x, (x ^ 0) == x + { + // (0 ^ x) == x + // (x ^ 0) == x ZeroVN = VNZeroForType(typ); if (arg0VN == ZeroVN) { resultVN = arg1VN; + break; } else if (arg1VN == ZeroVN) { resultVN = arg0VN; + break; + } + + // x ^ x == 0 + if (arg0VN == arg1VN) + { + resultVN = ZeroVN; } break; + } case GT_AND: + { // (x & 0) == 0 // (0 & x) == 0 ZeroVN = VNZeroForType(typ); if (arg0VN == ZeroVN) { resultVN = ZeroVN; + break; } else if (arg1VN == ZeroVN) { resultVN = ZeroVN; + break; + } + + // (x & ~0) == x + // (~0 & x) == x + AllBitsVN = VNAllBitsForType(typ); + if (arg0VN == AllBitsVN) + { + resultVN = arg1VN; + break; + } + else if (arg1VN == AllBitsVN) + { + resultVN = arg0VN; + break; + } + + // x & x == x + if (arg0VN == arg1VN) + { + resultVN = arg0VN; } break; + } case GT_LSH: case GT_RSH: case GT_RSZ: case GT_ROL: case GT_ROR: + { // (x << 0) == x // (x >> 0) == x // (x rol 0) == x @@ -4366,6 +4624,7 @@ ValueNum ValueNumStore::EvalUsingMathIdentity(var_types typ, VNFunc func, ValueN { resultVN = arg0VN; } + // (0 << x) == 0 // (0 >> x) == 0 // (0 rol x) == 0 @@ -4375,6 +4634,7 @@ ValueNum ValueNumStore::EvalUsingMathIdentity(var_types typ, VNFunc func, ValueN resultVN = ZeroVN; } break; + } case GT_EQ: // (null == non-null) == false @@ -6012,37 +6272,60 @@ void ValueNumStore::SetVNIsCheckedBound(ValueNum vn) } #ifdef FEATURE_HW_INTRINSICS -ValueNum EvaluateUnarySimd( - ValueNumStore* vns, genTreeOps oper, bool scalar, var_types simdType, var_types baseType, ValueNum arg0VN) +template +TSimd BroadcastConstantToSimd(ValueNumStore* vns, var_types baseType, ValueNum argVN) { - switch (simdType) + assert(vns->IsVNConstant(argVN)); + assert(!varTypeIsSIMD(vns->TypeOfVN(argVN))); + + TSimd result = {}; + + switch (baseType) { - case TYP_SIMD8: + case TYP_FLOAT: { - simd8_t result = {}; - EvaluateUnarySimd(oper, scalar, baseType, &result, vns->GetConstantSimd8(arg0VN)); - return vns->VNForSimd8Con(result); + float arg = vns->GetConstantSingle(argVN); + BroadcastConstantToSimd(&result, arg); + break; } - case TYP_SIMD12: + case TYP_DOUBLE: { - simd12_t result = {}; - EvaluateUnarySimd(oper, scalar, baseType, &result, vns->GetConstantSimd12(arg0VN)); - return vns->VNForSimd12Con(result); + double arg = vns->GetConstantDouble(argVN); + BroadcastConstantToSimd(&result, arg); + break; } - case TYP_SIMD16: + case TYP_BYTE: + case TYP_UBYTE: { - simd16_t result = {}; - EvaluateUnarySimd(oper, scalar, baseType, &result, vns->GetConstantSimd16(arg0VN)); - return vns->VNForSimd16Con(result); + uint8_t arg = static_cast(vns->GetConstantInt32(argVN)); + BroadcastConstantToSimd(&result, arg); + break; } - case TYP_SIMD32: + case TYP_SHORT: + case TYP_USHORT: { - simd32_t result = {}; - EvaluateUnarySimd(oper, scalar, baseType, &result, vns->GetConstantSimd32(arg0VN)); - return vns->VNForSimd32Con(result); + uint16_t arg = static_cast(vns->GetConstantInt32(argVN)); + BroadcastConstantToSimd(&result, arg); + break; + } + + case TYP_INT: + case TYP_UINT: + { + uint32_t arg = static_cast(vns->GetConstantInt32(argVN)); + BroadcastConstantToSimd(&result, arg); + break; + } + + case TYP_LONG: + case TYP_ULONG: + { + uint64_t arg = static_cast(vns->GetConstantInt64(argVN)); + BroadcastConstantToSimd(&result, arg); + break; } default: @@ -6050,47 +6333,153 @@ ValueNum EvaluateUnarySimd( unreached(); } } + + return result; } -ValueNum EvaluateBinarySimd(ValueNumStore* vns, - genTreeOps oper, - bool scalar, - var_types simdType, - var_types baseType, - ValueNum arg0VN, - ValueNum arg1VN) +simd8_t GetConstantSimd8(ValueNumStore* vns, var_types baseType, ValueNum argVN) +{ + assert(vns->IsVNConstant(argVN)); + + if (vns->TypeOfVN(argVN) == TYP_SIMD8) + { + return vns->GetConstantSimd8(argVN); + } + + return BroadcastConstantToSimd(vns, baseType, argVN); +} + +simd12_t GetConstantSimd12(ValueNumStore* vns, var_types baseType, ValueNum argVN) +{ + assert(vns->IsVNConstant(argVN)); + + if (vns->TypeOfVN(argVN) == TYP_SIMD12) + { + return vns->GetConstantSimd12(argVN); + } + + return BroadcastConstantToSimd(vns, baseType, argVN); +} + +simd16_t GetConstantSimd16(ValueNumStore* vns, var_types baseType, ValueNum argVN) +{ + assert(vns->IsVNConstant(argVN)); + + if (vns->TypeOfVN(argVN) == TYP_SIMD16) + { + return vns->GetConstantSimd16(argVN); + } + + return BroadcastConstantToSimd(vns, baseType, argVN); +} + +simd32_t GetConstantSimd32(ValueNumStore* vns, var_types baseType, ValueNum argVN) +{ + assert(vns->IsVNConstant(argVN)); + + if (vns->TypeOfVN(argVN) == TYP_SIMD32) + { + return vns->GetConstantSimd32(argVN); + } + + return BroadcastConstantToSimd(vns, baseType, argVN); +} + +ValueNum EvaluateUnarySimd( + ValueNumStore* vns, genTreeOps oper, bool scalar, var_types simdType, var_types baseType, ValueNum arg0VN) +{ + switch (simdType) + { + case TYP_SIMD8: + { + simd8_t arg0 = GetConstantSimd8(vns, baseType, arg0VN); + + simd8_t result = {}; + EvaluateUnarySimd(oper, scalar, baseType, &result, arg0); + return vns->VNForSimd8Con(result); + } + + case TYP_SIMD12: + { + simd12_t arg0 = GetConstantSimd12(vns, baseType, arg0VN); + + simd12_t result = {}; + EvaluateUnarySimd(oper, scalar, baseType, &result, arg0); + return vns->VNForSimd12Con(result); + } + + case TYP_SIMD16: + { + simd16_t arg0 = GetConstantSimd16(vns, baseType, arg0VN); + + simd16_t result = {}; + EvaluateUnarySimd(oper, scalar, baseType, &result, arg0); + return vns->VNForSimd16Con(result); + } + + case TYP_SIMD32: + { + simd32_t arg0 = GetConstantSimd32(vns, baseType, arg0VN); + + simd32_t result = {}; + EvaluateUnarySimd(oper, scalar, baseType, &result, arg0); + return vns->VNForSimd32Con(result); + } + + default: + { + unreached(); + } + } +} + +ValueNum EvaluateBinarySimd(ValueNumStore* vns, + genTreeOps oper, + bool scalar, + var_types simdType, + var_types baseType, + ValueNum arg0VN, + ValueNum arg1VN) { switch (simdType) { case TYP_SIMD8: { + simd8_t arg0 = GetConstantSimd8(vns, baseType, arg0VN); + simd8_t arg1 = GetConstantSimd8(vns, baseType, arg1VN); + simd8_t result = {}; - EvaluateBinarySimd(oper, scalar, baseType, &result, vns->GetConstantSimd8(arg0VN), - vns->GetConstantSimd8(arg1VN)); + EvaluateBinarySimd(oper, scalar, baseType, &result, arg0, arg1); return vns->VNForSimd8Con(result); } case TYP_SIMD12: { + simd12_t arg0 = GetConstantSimd12(vns, baseType, arg0VN); + simd12_t arg1 = GetConstantSimd12(vns, baseType, arg1VN); + simd12_t result = {}; - EvaluateBinarySimd(oper, scalar, baseType, &result, vns->GetConstantSimd12(arg0VN), - vns->GetConstantSimd12(arg1VN)); + EvaluateBinarySimd(oper, scalar, baseType, &result, arg0, arg1); return vns->VNForSimd12Con(result); } case TYP_SIMD16: { + simd16_t arg0 = GetConstantSimd16(vns, baseType, arg0VN); + simd16_t arg1 = GetConstantSimd16(vns, baseType, arg1VN); + simd16_t result = {}; - EvaluateBinarySimd(oper, scalar, baseType, &result, vns->GetConstantSimd16(arg0VN), - vns->GetConstantSimd16(arg1VN)); + EvaluateBinarySimd(oper, scalar, baseType, &result, arg0, arg1); return vns->VNForSimd16Con(result); } case TYP_SIMD32: { + simd32_t arg0 = GetConstantSimd32(vns, baseType, arg0VN); + simd32_t arg1 = GetConstantSimd32(vns, baseType, arg1VN); + simd32_t result = {}; - EvaluateBinarySimd(oper, scalar, baseType, &result, vns->GetConstantSimd32(arg0VN), - vns->GetConstantSimd32(arg1VN)); + EvaluateBinarySimd(oper, scalar, baseType, &result, arg0, arg1); return vns->VNForSimd32Con(result); } @@ -6173,6 +6562,37 @@ ValueNum EvaluateSimdGetElement(ValueNumStore* vns, var_types baseType, TSimd ar } } +ValueNum EvaluateSimdGetElement(ValueNumStore* vns, var_types type, var_types baseType, ValueNum arg0VN, int arg1) +{ + switch (vns->TypeOfVN(arg0VN)) + { + case TYP_SIMD8: + { + return EvaluateSimdGetElement(vns, baseType, vns->GetConstantSimd8(arg0VN), arg1); + } + + case TYP_SIMD12: + { + return EvaluateSimdGetElement(vns, baseType, vns->GetConstantSimd12(arg0VN), arg1); + } + + case TYP_SIMD16: + { + return EvaluateSimdGetElement(vns, baseType, vns->GetConstantSimd16(arg0VN), arg1); + } + + case TYP_SIMD32: + { + return EvaluateSimdGetElement(vns, baseType, vns->GetConstantSimd32(arg0VN), arg1); + } + + default: + { + unreached(); + } + } +} + ValueNum ValueNumStore::EvalHWIntrinsicFunUnary(var_types type, var_types baseType, NamedIntrinsic ni, @@ -6253,6 +6673,11 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunUnary(var_types type, { return EvaluateUnarySimd(this, GT_NEG, /* scalar */ true, type, baseType, arg0VN); } + + case NI_AdvSimd_Not: + { + return EvaluateUnarySimd(this, GT_NOT, /* scalar */ false, type, baseType, arg0VN); + } #endif // TARGET_ARM64 #if defined(TARGET_XARCH) @@ -6430,46 +6855,161 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, } #ifdef TARGET_ARM64 - case NI_Vector64_GetElement: + case NI_AdvSimd_And: +#else + case NI_SSE_And: + case NI_SSE2_And: + case NI_AVX_And: + case NI_AVX2_And: +#endif + { + return EvaluateBinarySimd(this, GT_AND, /* scalar */ false, type, baseType, arg0VN, arg1VN); + } + +#ifdef TARGET_ARM64 + case NI_AdvSimd_BitwiseClear: + { + return EvaluateBinarySimd(this, GT_AND_NOT, /* scalar */ false, type, baseType, arg0VN, arg1VN); + } +#else + case NI_SSE_AndNot: + case NI_SSE2_AndNot: + case NI_AVX_AndNot: + case NI_AVX2_AndNot: + { + // xarch does: ~arg0VN & arg1VN + return EvaluateBinarySimd(this, GT_AND_NOT, /* scalar */ false, type, baseType, arg1VN, arg0VN); + } +#endif + +#ifdef TARGET_ARM64 + case NI_AdvSimd_Arm64_Divide: +#else + case NI_SSE_Divide: + case NI_SSE2_Divide: + case NI_AVX_Divide: #endif + { + return EvaluateBinarySimd(this, GT_DIV, /* scalar */ false, type, baseType, arg0VN, arg1VN); + } + +#ifdef TARGET_ARM64 + case NI_AdvSimd_DivideScalar: +#else + case NI_SSE_DivideScalar: + case NI_SSE2_DivideScalar: +#endif + { + return EvaluateBinarySimd(this, GT_DIV, /* scalar */ true, type, baseType, arg0VN, arg1VN); + } + case NI_Vector128_GetElement: -#ifdef TARGET_XARCH +#ifdef TARGET_ARM64 + case NI_Vector64_GetElement: +#else case NI_Vector256_GetElement: #endif { - switch (TypeOfVN(arg0VN)) - { - case TYP_SIMD8: - { - return EvaluateSimdGetElement(this, baseType, GetConstantSimd8(arg0VN), - GetConstantInt32(arg1VN)); - } + return EvaluateSimdGetElement(this, type, baseType, arg0VN, GetConstantInt32(arg1VN)); + } - case TYP_SIMD12: - { - return EvaluateSimdGetElement(this, baseType, GetConstantSimd12(arg0VN), - GetConstantInt32(arg1VN)); - } +#ifdef TARGET_ARM64 + case NI_AdvSimd_MultiplyByScalar: + case NI_AdvSimd_Arm64_MultiplyByScalar: + { + // MultiplyByScalar takes a vector as the second operand but only utilizes element 0 + // We need to extract it and then functionally broadcast it up for the evaluation to + // work as expected. - case TYP_SIMD16: - { - return EvaluateSimdGetElement(this, baseType, GetConstantSimd16(arg0VN), - GetConstantInt32(arg1VN)); - } + arg1VN = EvaluateSimdGetElement(this, type, baseType, arg1VN, 0); + FALLTHROUGH; + } +#endif - case TYP_SIMD32: - { - return EvaluateSimdGetElement(this, baseType, GetConstantSimd32(arg0VN), - GetConstantInt32(arg1VN)); - } +#ifdef TARGET_ARM64 + case NI_AdvSimd_Multiply: + case NI_AdvSimd_Arm64_Multiply: +#else + case NI_SSE_Multiply: + case NI_SSE2_Multiply: + case NI_SSE2_MultiplyLow: + case NI_SSE41_MultiplyLow: + case NI_AVX_Multiply: + case NI_AVX2_MultiplyLow: +#endif + { + return EvaluateBinarySimd(this, GT_MUL, /* scalar */ false, type, baseType, arg0VN, arg1VN); + } - default: - { - unreached(); - } - } +#ifdef TARGET_ARM64 + case NI_AdvSimd_MultiplyScalar: +#else + case NI_SSE_MultiplyScalar: + case NI_SSE2_MultiplyScalar: +#endif + { + return EvaluateBinarySimd(this, GT_MUL, /* scalar */ true, type, baseType, arg0VN, arg1VN); + } + +#ifdef TARGET_ARM64 + case NI_AdvSimd_Or: +#else + case NI_SSE_Or: + case NI_SSE2_Or: + case NI_AVX_Or: + case NI_AVX2_Or: +#endif + { + return EvaluateBinarySimd(this, GT_OR, /* scalar */ false, type, baseType, arg0VN, arg1VN); + } + +#ifdef TARGET_ARM64 + case NI_AdvSimd_ShiftLeftLogical: +#else + case NI_SSE2_ShiftLeftLogical: + case NI_AVX2_ShiftLeftLogical: +#endif + { + return EvaluateBinarySimd(this, GT_LSH, /* scalar */ false, type, baseType, arg0VN, arg1VN); + } + +#ifdef TARGET_ARM64 + case NI_AdvSimd_ShiftRightArithmetic: +#else + case NI_SSE2_ShiftRightArithmetic: + case NI_AVX2_ShiftRightArithmetic: +#endif + { + return EvaluateBinarySimd(this, GT_RSH, /* scalar */ false, type, baseType, arg0VN, arg1VN); + } + +#ifdef TARGET_ARM64 + case NI_AdvSimd_ShiftRightLogical: +#else + case NI_SSE2_ShiftRightLogical: + case NI_AVX2_ShiftRightLogical: +#endif + { + return EvaluateBinarySimd(this, GT_RSZ, /* scalar */ false, type, baseType, arg0VN, arg1VN); } +#ifdef TARGET_ARM64 + case NI_AdvSimd_ShiftLeftLogicalScalar: + { + return EvaluateBinarySimd(this, GT_LSH, /* scalar */ true, type, baseType, arg0VN, arg1VN); + } + + case NI_AdvSimd_ShiftRightArithmeticScalar: + { + return EvaluateBinarySimd(this, GT_RSH, /* scalar */ true, type, baseType, arg0VN, arg1VN); + } + + case NI_AdvSimd_ShiftRightLogicalScalar: + { + return EvaluateBinarySimd(this, GT_RSZ, /* scalar */ true, type, baseType, arg0VN, arg1VN); + } +#endif // TARGET_ARM64 + #ifdef TARGET_ARM64 case NI_AdvSimd_Subtract: case NI_AdvSimd_Arm64_Subtract: @@ -6493,6 +7033,18 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, return EvaluateBinarySimd(this, GT_SUB, /* scalar */ true, type, baseType, arg0VN, arg1VN); } +#ifdef TARGET_ARM64 + case NI_AdvSimd_Xor: +#else + case NI_SSE_Xor: + case NI_SSE2_Xor: + case NI_AVX_Xor: + case NI_AVX2_Xor: +#endif + { + return EvaluateBinarySimd(this, GT_XOR, /* scalar */ false, type, baseType, arg0VN, arg1VN); + } + default: break; } @@ -6511,14 +7063,213 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, case NI_AVX2_Add: #endif { - // Handle `x + 0` and `0 + x` + if (varTypeIsFloating(baseType)) + { + // Not safe for floating-point when x == -0.0 + break; + } + + // Handle `x + 0 == x` and `0 + x == x` + ValueNum zeroVN = VNZeroForType(type); + + if (cnsVN == zeroVN) + { + return argVN; + } + break; + } + +#ifdef TARGET_ARM64 + case NI_AdvSimd_And: +#else + case NI_SSE_And: + case NI_SSE2_And: + case NI_AVX_And: + case NI_AVX2_And: +#endif + { + // Handle `x & 0 == 0` and `0 & x == 0` + ValueNum zeroVN = VNZeroForType(type); + + if (cnsVN == zeroVN) + { + return zeroVN; + } + + // Handle `x & ~0 == x` and `~0 & x == x` + ValueNum allBitsVN = VNAllBitsForType(type); + + if (cnsVN == allBitsVN) + { + return argVN; + } + break; + } + +#ifdef TARGET_ARM64 + case NI_AdvSimd_BitwiseClear: +#else + case NI_SSE_AndNot: + case NI_SSE2_AndNot: + case NI_AVX_AndNot: + case NI_AVX2_AndNot: + { +#ifdef TARGET_ARM64 + if (cnsVN == arg0VN) + { + // arm64 preserves the args, so we can only handle `x & ~cns` + break; + } +#else + if (cnsVN == arg1VN) + { + // xarch swaps the args, so we can only handle `~cns & x` + break; + } +#endif + + // Handle `x & ~0 == x` + ValueNum zeroVN = VNZeroForType(type); + + if (cnsVN == zeroVN) + { + return argVN; + } + + // Handle `x & 0 == 0` + ValueNum allBitsVN = VNAllBitsForType(type); + + if (cnsVN == allBitsVN) + { + return zeroVN; + } + break; + } +#endif + +#ifdef TARGET_ARM64 + case NI_AdvSimd_Arm64_Divide: +#else + case NI_SSE_Divide: + case NI_SSE2_Divide: + case NI_AVX_Divide: +#endif + { + // Handle `x / 1 == x`. + // This is safe for all floats since we do not fault for sNaN + ValueNum oneVN; + + if (varTypeIsSIMD(TypeOfVN(arg1VN))) + { + oneVN = VNOneForSimdType(type, baseType); + } + else + { + oneVN = VNOneForType(baseType); + } + + if (arg1VN == oneVN) + { + return arg0VN; + } + break; + } + +#ifdef TARGET_ARM64 + case NI_AdvSimd_Multiply: + case NI_AdvSimd_MultiplyByScalar: + case NI_AdvSimd_Arm64_Multiply: + case NI_AdvSimd_Arm64_MultiplyByScalar: +#else + case NI_SSE_Multiply: + case NI_SSE2_Multiply: + case NI_SSE2_MultiplyLow: + case NI_SSE41_MultiplyLow: + case NI_AVX_Multiply: + case NI_AVX2_MultiplyLow: +#endif + { + if (!varTypeIsFloating(baseType)) + { + // Handle `x * 0 == 0` and `0 * x == 0` + // Not safe for floating-point when x == -0.0, NaN, +Inf, -Inf + ValueNum zeroVN = VNZeroForType(TypeOfVN(cnsVN)); + + if (cnsVN == zeroVN) + { + return zeroVN; + } + } + // Handle `x * 1 == x` and `1 * x == x` + // This is safe for all floats since we do not fault for sNaN + ValueNum oneVN; + + if (varTypeIsSIMD(TypeOfVN(cnsVN))) + { + oneVN = VNOneForSimdType(type, baseType); + } + else + { + oneVN = VNOneForType(baseType); + } + + if (cnsVN == oneVN) + { + return argVN; + } + break; + } + +#ifdef TARGET_ARM64 + case NI_AdvSimd_Or: +#else + case NI_SSE_Or: + case NI_SSE2_Or: + case NI_AVX_Or: + case NI_AVX2_Or: +#endif + { + // Handle `x | 0 == x` and `0 | x == x` ValueNum zeroVN = VNZeroForType(type); if (cnsVN == zeroVN) { return argVN; } + + // Handle `x | ~0 == ~0` and `~0 | x== ~0` + ValueNum allBitsVN = VNAllBitsForType(type); + + if (cnsVN == allBitsVN) + { + return allBitsVN; + } + break; + } + +#ifdef TARGET_ARM64 + case NI_AdvSimd_ShiftLeftLogical: + case NI_AdvSimd_ShiftRightArithmetic: + case NI_AdvSimd_ShiftRightLogical: +#else + case NI_SSE2_ShiftLeftLogical: + case NI_SSE2_ShiftRightArithmetic: + case NI_SSE2_ShiftRightLogical: + case NI_AVX2_ShiftLeftLogical: + case NI_AVX2_ShiftRightArithmetic: + case NI_AVX2_ShiftRightLogical: +#endif + { + // Handle `x << 0 == x` and `0 << x == 0` + // Handle `x >> 0 == x` and `0 >> x == 0` + // Handle `x >>> 0 == x` and `0 >>> x == 0` + ValueNum zeroVN = VNZeroForType(TypeOfVN(cnsVN)); + + if (cnsVN == zeroVN) + { + return (cnsVN == arg1VN) ? argVN : zeroVN; + } break; } @@ -6532,14 +7283,32 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, case NI_AVX2_Subtract: #endif { - // Handle `x - 0` - - if (cnsVN != arg1VN) + if (varTypeIsFloating(baseType)) { - // This is `0 - x` which is `NEG(x)` + // Not safe for floating-point when x == -0.0 break; } + // Handle `x - 0 == x` + ValueNum zeroVN = VNZeroForType(type); + + if (arg1VN == zeroVN) + { + return argVN; + } + break; + } + +#ifdef TARGET_ARM64 + case NI_AdvSimd_Xor: +#else + case NI_SSE_Xor: + case NI_SSE2_Xor: + case NI_AVX_Xor: + case NI_AVX2_Xor: +#endif + { + // Handle `x | 0 == x` and `0 | x == x` ValueNum zeroVN = VNZeroForType(type); if (cnsVN == zeroVN) @@ -6553,6 +7322,86 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, break; } } + else if (arg0VN == arg1VN) + { + switch (ni) + { +#ifdef TARGET_ARM64 + case NI_AdvSimd_And: +#else + case NI_SSE_And: + case NI_SSE2_And: + case NI_AVX_And: + case NI_AVX2_And: +#endif + { + // Handle `x & x == x` + return arg0VN; + } + +#ifdef TARGET_ARM64 + case NI_AdvSimd_BitwiseClear: +#else + case NI_SSE_AndNot: + case NI_SSE2_AndNot: + case NI_AVX_AndNot: + case NI_AVX2_AndNot: + { + // Handle `x & ~x == 0` + return VNZeroForType(type); + } +#endif + +#ifdef TARGET_ARM64 + case NI_AdvSimd_Or: +#else + case NI_SSE_Or: + case NI_SSE2_Or: + case NI_AVX_Or: + case NI_AVX2_Or: +#endif + { + // Handle `x | x == x` + return arg0VN; + } + +#ifdef TARGET_ARM64 + case NI_AdvSimd_Subtract: + case NI_AdvSimd_Arm64_Subtract: +#else + case NI_SSE_Subtract: + case NI_SSE2_Subtract: + case NI_AVX_Subtract: + case NI_AVX2_Subtract: +#endif + { + if (varTypeIsFloating(baseType)) + { + // Not safe for floating-point when x == -0.0, NaN, +Inf, -Inf + break; + } + + // Handle `x - x == 0` + return VNZeroForType(type); + } + +#ifdef TARGET_ARM64 + case NI_AdvSimd_Xor: +#else + case NI_SSE_Xor: + case NI_SSE2_Xor: + case NI_AVX_Xor: + case NI_AVX2_Xor: +#endif + { + // Handle `x ^ x == 0` + return arg0VN; + } + + default: + break; + } + } if (encodeResultType) { diff --git a/src/coreclr/jit/valuenum.h b/src/coreclr/jit/valuenum.h index 5a122070ffbbf..b3dcc252a1332 100644 --- a/src/coreclr/jit/valuenum.h +++ b/src/coreclr/jit/valuenum.h @@ -341,6 +341,7 @@ class ValueNumStore template static bool IsIntZero(T v); +public: // Given an constant value number return its value. int GetConstantInt32(ValueNum argVN); INT64 GetConstantInt64(ValueNum argVN); @@ -348,15 +349,13 @@ class ValueNumStore float GetConstantSingle(ValueNum argVN); #if defined(FEATURE_SIMD) -public: simd8_t GetConstantSimd8(ValueNum argVN); simd12_t GetConstantSimd12(ValueNum argVN); simd16_t GetConstantSimd16(ValueNum argVN); simd32_t GetConstantSimd32(ValueNum argVN); - -private: #endif // FEATURE_SIMD +private: // Assumes that all the ValueNum arguments of each of these functions have been shown to represent constants. // Assumes that "vnf" is a operator of the appropriate arity (unary for the first, binary for the second). // Assume that "CanEvalForConstantArgs(vnf)" is true. @@ -521,7 +520,14 @@ class ValueNumStore // It returns NoVN for a "typ" that has no one value, such as TYP_REF. ValueNum VNOneForType(var_types typ); + // Returns the value number for AllBitsSet of the given "typ". + // It has an unreached() for a "typ" that has no all bits set value, such as TYP_VOID. + ValueNum VNAllBitsForType(var_types typ); + #ifdef FEATURE_SIMD + // Returns the value number for one of the given "simdType" and "simdBaseType". + ValueNum VNOneForSimdType(var_types simdType, var_types simdBaseType); + // A helper function for constructing VNF_SimdType VNs. ValueNum VNForSimdType(unsigned simdSize, CorInfoType simdBaseJitType); #endif // FEATURE_SIMD diff --git a/src/tests/JIT/HardwareIntrinsics/General/ConstantFolding/SimdConstantFoldings.cs b/src/tests/JIT/HardwareIntrinsics/General/ConstantFolding/SimdConstantFoldings.cs index 56aaec012146b..8ba3b863e2dee 100644 --- a/src/tests/JIT/HardwareIntrinsics/General/ConstantFolding/SimdConstantFoldings.cs +++ b/src/tests/JIT/HardwareIntrinsics/General/ConstantFolding/SimdConstantFoldings.cs @@ -152,15 +152,15 @@ public static void SubtractTests() ); Assert.Equal( - Vector128.Create((sbyte)(+0), -3, +0, +0, +00, +00, +00, +00, +00, +00, +00, +00, +00, +00, +00, +00), - Vector128.Create((sbyte)(+1), -2, +3, -4, +05, -06, +07, -08, +09, -10, +11, -12, +13, -14, +15, -16) - - Vector128.Create((sbyte)(+1), +1, +3, -4, +05, -06, +07, -08, +09, -10, +11, -12, +13, -14, +15, -16) + Vector128.Create((sbyte)(+0), -3, +0, +0, +0, +0, +0, +0, +0, +00, +00, +00, +00, +00, +00, +00), + Vector128.Create((sbyte)(+1), -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16) + - Vector128.Create((sbyte)(+1), +1, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16) ); Assert.Equal( - Vector128.Create((short)(+0), -3, +0, +0, +00, +00, +00, +00), - Vector128.Create((short)(+1), -2, +3, -4, +05, -06, +07, -08) - - Vector128.Create((short)(+1), +1, +3, -4, +05, -06, +07, -08) + Vector128.Create((short)(+0), -3, +0, +0, +0, +0, +0, +0), + Vector128.Create((short)(+1), -2, +3, -4, +5, -6, +7, -8) + - Vector128.Create((short)(+1), +1, +3, -4, +5, -6, +7, -8) ); Assert.Equal( @@ -241,4 +241,496 @@ public static void GetElementTests() Vector128.Create((double)(+1), -2).GetElement(1) ); } + + [Fact] + public static void NotTests() + { + Assert.Equal( + Vector128.Create((byte)(0xFE), 0x01, 0xFC, 0x03, 0xFA, 0x05, 0xF8, 0x07, 0xF6, 0x09, 0xF4, 0x0B, 0xF2, 0x0D, 0xF0, 0x0F), + ~Vector128.Create((byte)(0x01), 0xFE, 0x03, 0xFC, 0x05, 0xFA, 0x07, 0xF8, 0x09, 0xF6, 0x0B, 0xF4, 0x0D, 0xF2, 0x0F, 0xF0) + ); + + Assert.Equal( + Vector128.Create((ushort)(0xFFFE), 0x0001, 0xFFFC, 0x0003, 0xFFFA, 0x0005, 0xFFF8, 0x0007), + ~Vector128.Create((ushort)(0x0001), 0xFFFE, 0x0003, 0xFFFC, 0x0005, 0xFFFA, 0x0007, 0xFFF8) + ); + + Assert.Equal( + Vector128.Create((uint)(0xFFFF_FFFE), 0x0000_0001, 0xFFFF_FFFC, 0x0000_0003), + ~Vector128.Create((uint)(0x0000_0001), 0xFFFF_FFFE, 0x0000_0003, 0xFFFF_FFFC) + ); + + Assert.Equal( + Vector128.Create((ulong)(0xFFFF_FFFF_FFFF_FFFE), 0x0000_0000_0000_0001), + ~Vector128.Create((ulong)(0x0000_0000_0000_0001), 0xFFFF_FFFF_FFFF_FFFE) + ); + + Assert.Equal( + Vector128.Create((sbyte)(-2), +1, -4, +3, -6, +5, -8, +7, -10, +9, -12, +11, -14, +13, -16, +15), + ~Vector128.Create((sbyte)(+1), -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16) + ); + + Assert.Equal( + Vector128.Create((short)(-2), +1, -4, +3, -6, +5, -8, +7), + ~Vector128.Create((short)(+1), -2, +3, -4, +5, -6, +7, -8) + ); + + Assert.Equal( + Vector128.Create((int)(-2), +1, -4, +3), + ~Vector128.Create((int)(+1), -2, +3, -4) + ); + + Assert.Equal( + Vector128.Create((long)(-2), +1), + ~Vector128.Create((long)(+1), -2) + ); + + Assert.Equal( + Vector128.Create((float)(-3.9999998f), +1.9999999f, -1.4999999f, +0.99999994f), + ~Vector128.Create((float)(+1), -2, +3, -4) + ); + + Assert.Equal( + Vector128.Create((double)(-3.9999999999999996), +1.9999999999999998), + ~Vector128.Create((double)(+1), -2) + ); + } + + [Fact] + public static void AndTests() + { + Assert.Equal( + Vector128.Create((byte)(0x01), 0x00, 0x03, 0xFC, 0x05, 0xFA, 0x07, 0xF8, 0x09, 0xF6, 0x0B, 0xF4, 0x0D, 0xF2, 0x0F, 0xF0), + Vector128.Create((byte)(0x01), 0xFE, 0x03, 0xFC, 0x05, 0xFA, 0x07, 0xF8, 0x09, 0xF6, 0x0B, 0xF4, 0x0D, 0xF2, 0x0F, 0xF0) + & Vector128.Create((byte)(0x01), 0x01, 0x03, 0xFC, 0x05, 0xFA, 0x07, 0xF8, 0x09, 0xF6, 0x0B, 0xF4, 0x0D, 0xF2, 0x0F, 0xF0) + ); + + Assert.Equal( + Vector128.Create((ushort)(0x0001), 0x0000, 0x0003, 0xFFFC, 0x0005, 0xFFFA, 0x0007, 0xFFF8), + Vector128.Create((ushort)(0x0001), 0xFFFE, 0x0003, 0xFFFC, 0x0005, 0xFFFA, 0x0007, 0xFFF8) + & Vector128.Create((ushort)(0x0001), 0x0001, 0x0003, 0xFFFC, 0x0005, 0xFFFA, 0x0007, 0xFFF8) + ); + + Assert.Equal( + Vector128.Create((uint)(0x0000_0001), 0x0000_0000, 0x0000_0003, 0xFFFF_FFFC), + Vector128.Create((uint)(0x0000_0001), 0xFFFF_FFFE, 0x0000_0003, 0xFFFF_FFFC) + & Vector128.Create((uint)(0x0000_0001), 0x0000_0001, 0x0000_0003, 0xFFFF_FFFC) + ); + + Assert.Equal( + Vector128.Create((ulong)(0x0000_0000_0000_0001), 0x0000_0000_0000_0000), + Vector128.Create((ulong)(0x0000_0000_0000_0001), 0xFFFF_FFFF_FFFF_FFFE) + & Vector128.Create((ulong)(0x0000_0000_0000_0001), 0x0000_0000_0000_0001) + ); + + Assert.Equal( + Vector128.Create((sbyte)(+1), +0, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16), + Vector128.Create((sbyte)(+1), -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16) + & Vector128.Create((sbyte)(+1), +1, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16) + ); + + Assert.Equal( + Vector128.Create((short)(+1), +0, +3, -4, +5, -6, +7, -8), + Vector128.Create((short)(+1), -2, +3, -4, +5, -6, +7, -8) + & Vector128.Create((short)(+1), +1, +3, -4, +5, -6, +7, -8) + ); + + Assert.Equal( + Vector128.Create((int)(+1), +0, +3, -4), + Vector128.Create((int)(+1), -2, +3, -4) + & Vector128.Create((int)(+1), +1, +3, -4) + ); + + Assert.Equal( + Vector128.Create((long)(+1), +0), + Vector128.Create((long)(+1), -2) + & Vector128.Create((long)(+1), +1) + ); + + Assert.Equal( + Vector128.Create((float)(+1), +0, +3, -4), + Vector128.Create((float)(+1), -2, +3, -4) + & Vector128.Create((float)(+1), +1, +3, -4) + ); + + Assert.Equal( + Vector128.Create((double)(+1), +0), + Vector128.Create((double)(+1), -2) + & Vector128.Create((double)(+1), +1) + ); + } + + [Fact] + public static void AndNotTests() + { + Assert.Equal( + Vector128.Create((byte)(0x00), 0xFE, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), + Vector128.AndNot( + Vector128.Create((byte)(0x01), 0xFE, 0x03, 0xFC, 0x05, 0xFA, 0x07, 0xF8, 0x09, 0xF6, 0x0B, 0xF4, 0x0D, 0xF2, 0x0F, 0xF0), + Vector128.Create((byte)(0x01), 0x01, 0x03, 0xFC, 0x05, 0xFA, 0x07, 0xF8, 0x09, 0xF6, 0x0B, 0xF4, 0x0D, 0xF2, 0x0F, 0xF0) + ) + ); + + Assert.Equal( + Vector128.Create((ushort)(0x0000), 0xFFFE, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000), + Vector128.AndNot( + Vector128.Create((ushort)(0x0001), 0xFFFE, 0x0003, 0xFFFC, 0x0005, 0xFFFA, 0x0007, 0xFFF8), + Vector128.Create((ushort)(0x0001), 0x0001, 0x0003, 0xFFFC, 0x0005, 0xFFFA, 0x0007, 0xFFF8) + ) + ); + + Assert.Equal( + Vector128.Create((uint)(0x0000_0000), 0xFFFF_FFFE, 0x0000_0000, 0x0000_0000), + Vector128.AndNot( + Vector128.Create((uint)(0x0000_0001), 0xFFFF_FFFE, 0x0000_0003, 0xFFFF_FFFC), + Vector128.Create((uint)(0x0000_0001), 0x0000_0001, 0x0000_0003, 0xFFFF_FFFC) + ) + ); + + Assert.Equal( + Vector128.Create((ulong)(0x0000_0000_0000_0000), 0xFFFF_FFFF_FFFF_FFFE), + Vector128.AndNot( + Vector128.Create((ulong)(0x0000_0000_0000_0001), 0xFFFF_FFFF_FFFF_FFFE), + Vector128.Create((ulong)(0x0000_0000_0000_0001), 0x0000_0000_0000_0001) + ) + ); + + Assert.Equal( + Vector128.Create((sbyte)(+0), -2, +0, +0, +0, +0, +0, +0, +0, +00, +00, +00, +00, +00, +00, +00), + Vector128.AndNot( + Vector128.Create((sbyte)(+1), -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16), + Vector128.Create((sbyte)(+1), +1, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16) + ) + ); + + Assert.Equal( + Vector128.Create((short)(+0), -2, +0, +0, +0, +0, +0, +0), + Vector128.AndNot( + Vector128.Create((short)(+1), -2, +3, -4, +5, -6, +7, -8), + Vector128.Create((short)(+1), +1, +3, -4, +5, -6, +7, -8) + ) + ); + + Assert.Equal( + Vector128.Create((int)(+0), -2, +0, +0), + Vector128.AndNot( + Vector128.Create((int)(+1), -2, +3, -4), + Vector128.Create((int)(+1), +1, +3, -4) + ) + ); + + Assert.Equal( + Vector128.Create((long)(+0), -2), + Vector128.AndNot( + Vector128.Create((long)(+1), -2), + Vector128.Create((long)(+1), +1) + ) + ); + + Assert.Equal( + Vector128.Create((float)(+0), -2, +0, +0), + Vector128.AndNot( + Vector128.Create((float)(+1), -2, +3, -4), + Vector128.Create((float)(+1), +1, +3, -4) + ) + ); + + Assert.Equal( + Vector128.Create((double)(+0), -2), + Vector128.AndNot( + Vector128.Create((double)(+1), -2), + Vector128.Create((double)(+1), +1) + ) + ); + } + + [Fact] + public static void LeftShiftTests() + { + Assert.Equal( + Vector128.Create((byte)(0x02), 0xFC, 0x06, 0xF8, 0x0A, 0xF4, 0x0E, 0xF0, 0x12, 0xEC, 0x16, 0xE8, 0x1A, 0xE4, 0x1E, 0xE0), + Vector128.Create((byte)(0x01), 0xFE, 0x03, 0xFC, 0x05, 0xFA, 0x07, 0xF8, 0x09, 0xF6, 0x0B, 0xF4, 0x0D, 0xF2, 0x0F, 0xF0) << 1 + ); + + Assert.Equal( + Vector128.Create((ushort)(0x0002), 0xFFFC, 0x0006, 0xFFF8, 0x000A, 0xFFF4, 0x000E, 0xFFF0), + Vector128.Create((ushort)(0x0001), 0xFFFE, 0x0003, 0xFFFC, 0x0005, 0xFFFA, 0x0007, 0xFFF8) << 1 + ); + + Assert.Equal( + Vector128.Create((uint)(0x0000_0002), 0xFFFF_FFFC, 0x0000_0006, 0xFFFF_FFF8), + Vector128.Create((uint)(0x0000_0001), 0xFFFF_FFFE, 0x0000_0003, 0xFFFF_FFFC) << 1 + ); + + Assert.Equal( + Vector128.Create((ulong)(0x0000_0000_0000_0002), 0xFFFF_FFFF_FFFF_FFFC), + Vector128.Create((ulong)(0x0000_0000_0000_0001), 0xFFFF_FFFF_FFFF_FFFE) << 1 + ); + + Assert.Equal( + Vector128.Create((sbyte)(+2), -4, +6, -8, +10, -12, +14, -16, +18, -20, +22, -24, +26, -28, +30, -32), + Vector128.Create((sbyte)(+1), -2, +3, -4, +05, -06, +07, -08, +09, -10, +11, -12, +13, -14, +15, -16) << 1 + ); + + Assert.Equal( + Vector128.Create((short)(+2), -4, +6, -8, +10, -12, +14, -16), + Vector128.Create((short)(+1), -2, +3, -4, +05, -06, +07, -08) << 1 + ); + + Assert.Equal( + Vector128.Create((int)(+2), -4, +6, -8), + Vector128.Create((int)(+1), -2, +3, -4) << 1 + ); + + Assert.Equal( + Vector128.Create((long)(+2), -4), + Vector128.Create((long)(+1), -2) << 1 + ); + + Assert.Equal( + Vector128.Create((float)(+1.7014118E+38f), -0.0f, -1.1754944E-38f, -2.3509887E-38f), + Vector128.Create((float)(+1), -2, +3, -4) << 1 + ); + + Assert.Equal( + Vector128.Create((double)(+8.98846567431158E+307), -0.0), + Vector128.Create((double)(+1), -2) << 1 + ); + } + + [Fact] + public static void OrTests() + { + Assert.Equal( + Vector128.Create((byte)(0x01), 0xFF, 0x03, 0xFC, 0x05, 0xFA, 0x07, 0xF8, 0x09, 0xF6, 0x0B, 0xF4, 0x0D, 0xF2, 0x0F, 0xF0), + Vector128.Create((byte)(0x01), 0xFE, 0x03, 0xFC, 0x05, 0xFA, 0x07, 0xF8, 0x09, 0xF6, 0x0B, 0xF4, 0x0D, 0xF2, 0x0F, 0xF0) + | Vector128.Create((byte)(0x01), 0x01, 0x03, 0xFC, 0x05, 0xFA, 0x07, 0xF8, 0x09, 0xF6, 0x0B, 0xF4, 0x0D, 0xF2, 0x0F, 0xF0) + ); + + Assert.Equal( + Vector128.Create((ushort)(0x0001), 0xFFFF, 0x0003, 0xFFFC, 0x0005, 0xFFFA, 0x0007, 0xFFF8), + Vector128.Create((ushort)(0x0001), 0xFFFE, 0x0003, 0xFFFC, 0x0005, 0xFFFA, 0x0007, 0xFFF8) + | Vector128.Create((ushort)(0x0001), 0x0001, 0x0003, 0xFFFC, 0x0005, 0xFFFA, 0x0007, 0xFFF8) + ); + + Assert.Equal( + Vector128.Create((uint)(0x0000_0001), 0xFFFF_FFFF, 0x0000_0003, 0xFFFF_FFFC), + Vector128.Create((uint)(0x0000_0001), 0xFFFF_FFFE, 0x0000_0003, 0xFFFF_FFFC) + | Vector128.Create((uint)(0x0000_0001), 0x0000_0001, 0x0000_0003, 0xFFFF_FFFC) + ); + + Assert.Equal( + Vector128.Create((ulong)(0x0000_0000_0000_0001), 0xFFFF_FFFF_FFFF_FFFF), + Vector128.Create((ulong)(0x0000_0000_0000_0001), 0xFFFF_FFFF_FFFF_FFFE) + | Vector128.Create((ulong)(0x0000_0000_0000_0001), 0x0000_0000_0000_0001) + ); + + Assert.Equal( + Vector128.Create((sbyte)(+1), -1, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16), + Vector128.Create((sbyte)(+1), -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16) + | Vector128.Create((sbyte)(+1), +1, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16) + ); + + Assert.Equal( + Vector128.Create((short)(+1), -1, +3, -4, +5, -6, +7, -8), + Vector128.Create((short)(+1), -2, +3, -4, +5, -6, +7, -8) + | Vector128.Create((short)(+1), +1, +3, -4, +5, -6, +7, -8) + ); + + Assert.Equal( + Vector128.Create((int)(+1), -1, +3, -4), + Vector128.Create((int)(+1), -2, +3, -4) + | Vector128.Create((int)(+1), +1, +3, -4) + ); + + Assert.Equal( + Vector128.Create((long)(+1), -1), + Vector128.Create((long)(+1), -2) + | Vector128.Create((long)(+1), +1) + ); + + Assert.Equal( + Vector128.Create((float)(+1), +float.NegativeInfinity, +3, -4), + Vector128.Create((float)(+1), -2, +3, -4) + | Vector128.Create((float)(+1), +1, +3, -4) + ); + + Assert.Equal( + Vector128.Create((double)(+1), +double.NegativeInfinity), + Vector128.Create((double)(+1), -2) + | Vector128.Create((double)(+1), +1) + ); + } + + [Fact] + public static void RightShiftTests() + { + Assert.Equal( + Vector128.Create((byte)(0x00), 0x7F, 0x01, 0x7E, 0x02, 0x7D, 0x03, 0x7C, 0x04, 0x7B, 0x05, 0x7A, 0x06, 0x79, 0x07, 0x78), + Vector128.Create((byte)(0x01), 0xFE, 0x03, 0xFC, 0x05, 0xFA, 0x07, 0xF8, 0x09, 0xF6, 0x0B, 0xF4, 0x0D, 0xF2, 0x0F, 0xF0) >> 1 + ); + + Assert.Equal( + Vector128.Create((ushort)(0x0000), 0x7FFF, 0x0001, 0x7FFE, 0x0002, 0x7FFD, 0x0003, 0x7FFC), + Vector128.Create((ushort)(0x0001), 0xFFFE, 0x0003, 0xFFFC, 0x0005, 0xFFFA, 0x0007, 0xFFF8) >> 1 + ); + + Assert.Equal( + Vector128.Create((uint)(0x0000_0000), 0x7FFF_FFFF, 0x0000_0001, 0x7FFF_FFFE), + Vector128.Create((uint)(0x0000_0001), 0xFFFF_FFFE, 0x0000_0003, 0xFFFF_FFFC) >> 1 + ); + + Assert.Equal( + Vector128.Create((ulong)(0x0000_0000_0000_0000), 0x7FFF_FFFF_FFFF_FFFF), + Vector128.Create((ulong)(0x0000_0000_0000_0001), 0xFFFF_FFFF_FFFF_FFFE) >> 1 + ); + + Assert.Equal( + Vector128.Create((sbyte)(+0), -1, +1, -2, +2, -3, +3, -4, +4, -05, +05, -06, +06, -07, +07, -08), + Vector128.Create((sbyte)(+1), -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16) >> 1 + ); + + Assert.Equal( + Vector128.Create((short)(+0), -1, +1, -2, +2, -3, +3, -4), + Vector128.Create((short)(+1), -2, +3, -4, +5, -6, +7, -8) >> 1 + ); + + Assert.Equal( + Vector128.Create((int)(+0), -1, +1, -2), + Vector128.Create((int)(+1), -2, +3, -4) >> 1 + ); + + Assert.Equal( + Vector128.Create((long)(+0), -1), + Vector128.Create((long)(+1), -2) >> 1 + ); + + Assert.Equal( + Vector128.Create((float)(+8.131516E-20f), -3.689349E+19f, +1.3552527E-19f, -5.5340232E+19f), + Vector128.Create((float)(+1), -2, +3, -4) >> 1 + ); + + Assert.Equal( + Vector128.Create((double)(+1.118751109680031E-154), -2.6815615859885194E+154), + Vector128.Create((double)(+1), -2) >> 1 + ); + } + + [Fact] + public static void UnsignedRightShiftTests() + { + Assert.Equal( + Vector128.Create((byte)(0x00), 0x7F, 0x01, 0x7E, 0x02, 0x7D, 0x03, 0x7C, 0x04, 0x7B, 0x05, 0x7A, 0x06, 0x79, 0x07, 0x78), + Vector128.Create((byte)(0x01), 0xFE, 0x03, 0xFC, 0x05, 0xFA, 0x07, 0xF8, 0x09, 0xF6, 0x0B, 0xF4, 0x0D, 0xF2, 0x0F, 0xF0) >>> 1 + ); + + Assert.Equal( + Vector128.Create((ushort)(0x0000), 0x7FFF, 0x0001, 0x7FFE, 0x0002, 0x7FFD, 0x0003, 0x7FFC), + Vector128.Create((ushort)(0x0001), 0xFFFE, 0x0003, 0xFFFC, 0x0005, 0xFFFA, 0x0007, 0xFFF8) >>> 1 + ); + + Assert.Equal( + Vector128.Create((uint)(0x0000_0000), 0x7FFF_FFFF, 0x0000_0001, 0x7FFF_FFFE), + Vector128.Create((uint)(0x0000_0001), 0xFFFF_FFFE, 0x0000_0003, 0xFFFF_FFFC) >>> 1 + ); + + Assert.Equal( + Vector128.Create((ulong)(0x0000_0000_0000_0000), 0x7FFF_FFFF_FFFF_FFFF), + Vector128.Create((ulong)(0x0000_0000_0000_0001), 0xFFFF_FFFF_FFFF_FFFE) >>> 1 + ); + + Assert.Equal( + Vector128.Create((sbyte)(+0), +127, +1, +126, +2, +125, +3, +124, +4, +123, +05, +122, +06, +121, +07, +120), + Vector128.Create((sbyte)(+1), -002, +3, -004, +5, -006, +7, -008, +9, -010, +11, -012, +13, -014, +15, -016) >>> 1 + ); + + Assert.Equal( + Vector128.Create((short)(+0), +32767, +1, +32766, +2, +32765, +3, +32764), + Vector128.Create((short)(+1), -00002, +3, -00004, +5, -00006, +7, -00008) >>> 1 + ); + + Assert.Equal( + Vector128.Create((int)(+0), +2147483647, +1, +2147483646), + Vector128.Create((int)(+1), -0000000002, +3, -0000000004) >>> 1 + ); + + Assert.Equal( + Vector128.Create((long)(+0), +9223372036854775807), + Vector128.Create((long)(+1), -0000000000000000002) >>> 1 + ); + + Assert.Equal( + Vector128.Create((float)(+8.131516E-20f), +3.689349E+19f, +1.3552527E-19f, +5.5340232E+19f), + Vector128.Create((float)(+1), -2, +3, -4) >>> 1 + ); + + Assert.Equal( + Vector128.Create((double)(+1.118751109680031E-154), +2.6815615859885194E+154), + Vector128.Create((double)(+1), -2) >>> 1 + ); + } + + [Fact] + public static void XorTests() + { + Assert.Equal( + Vector128.Create((byte)(0x00), 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), + Vector128.Create((byte)(0x01), 0xFE, 0x03, 0xFC, 0x05, 0xFA, 0x07, 0xF8, 0x09, 0xF6, 0x0B, 0xF4, 0x0D, 0xF2, 0x0F, 0xF0) + ^ Vector128.Create((byte)(0x01), 0x01, 0x03, 0xFC, 0x05, 0xFA, 0x07, 0xF8, 0x09, 0xF6, 0x0B, 0xF4, 0x0D, 0xF2, 0x0F, 0xF0) + ); + + Assert.Equal( + Vector128.Create((ushort)(0x0000), 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000), + Vector128.Create((ushort)(0x0001), 0xFFFE, 0x0003, 0xFFFC, 0x0005, 0xFFFA, 0x0007, 0xFFF8) + ^ Vector128.Create((ushort)(0x0001), 0x0001, 0x0003, 0xFFFC, 0x0005, 0xFFFA, 0x0007, 0xFFF8) + ); + + Assert.Equal( + Vector128.Create((uint)(0x0000_0000), 0xFFFF_FFFF, 0x0000_0000, 0x0000_0000), + Vector128.Create((uint)(0x0000_0001), 0xFFFF_FFFE, 0x0000_0003, 0xFFFF_FFFC) + ^ Vector128.Create((uint)(0x0000_0001), 0x0000_0001, 0x0000_0003, 0xFFFF_FFFC) + ); + + Assert.Equal( + Vector128.Create((ulong)(0x0000_0000_0000_0000), 0xFFFF_FFFF_FFFF_FFFF), + Vector128.Create((ulong)(0x0000_0000_0000_0001), 0xFFFF_FFFF_FFFF_FFFE) + ^ Vector128.Create((ulong)(0x0000_0000_0000_0001), 0x0000_0000_0000_0001) + ); + + Assert.Equal( + Vector128.Create((sbyte)(+0), -1, +0, +0, +0, +0, +0, +0, +0, +00, +00, +00, +00, +00, +00, +00), + Vector128.Create((sbyte)(+1), -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16) + ^ Vector128.Create((sbyte)(+1), +1, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16) + ); + + Assert.Equal( + Vector128.Create((short)(+0), -1, +0, +0, +0, +0, +0, +0), + Vector128.Create((short)(+1), -2, +3, -4, +5, -6, +7, -8) + ^ Vector128.Create((short)(+1), +1, +3, -4, +5, -6, +7, -8) + ); + + Assert.Equal( + Vector128.Create((int)(+0), -1, +0, +0), + Vector128.Create((int)(+1), -2, +3, -4) + ^ Vector128.Create((int)(+1), +1, +3, -4) + ); + + Assert.Equal( + Vector128.Create((long)(+0), -1), + Vector128.Create((long)(+1), -2) + ^ Vector128.Create((long)(+1), +1) + ); + + Assert.Equal( + Vector128.Create((float)(+0), +float.NegativeInfinity, +0, +0), + Vector128.Create((float)(+1), -2, +3, -4) + ^ Vector128.Create((float)(+1), +1, +3, -4) + ); + + Assert.Equal( + Vector128.Create((double)(+0), +double.NegativeInfinity), + Vector128.Create((double)(+1), -2) + ^ Vector128.Create((double)(+1), +1) + ); + } }