Skip to content

Commit

Permalink
Adding more SIMD constant folding support (#82190)
Browse files Browse the repository at this point in the history
* Adding more SIMD constant folding support

* Adding tests for the new SIMD constant folding paths

* Ensure bitcasting float/double is using well-defined behavior
  • Loading branch information
tannergooding authored Feb 23, 2023
1 parent 2ffe0fe commit 7219954
Show file tree
Hide file tree
Showing 6 changed files with 1,687 additions and 100 deletions.
172 changes: 170 additions & 2 deletions src/coreclr/jit/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,39 @@ struct simd32_t
}
};

template <typename TBase>
TBase EvaluateUnaryScalarSpecialized(genTreeOps oper, TBase arg0)
{
switch (oper)
{
case GT_NOT:
{
return ~arg0;
}

default:
{
unreached();
}
}
}

template <>
inline float EvaluateUnaryScalarSpecialized<float>(genTreeOps oper, float arg0)
{
uint32_t arg0Bits = BitOperations::SingleToUInt32Bits(arg0);
uint32_t resultBits = EvaluateUnaryScalarSpecialized<uint32_t>(oper, arg0Bits);
return BitOperations::UInt32BitsToSingle(resultBits);
}

template <>
inline double EvaluateUnaryScalarSpecialized<double>(genTreeOps oper, double arg0)
{
uint64_t arg0Bits = BitOperations::DoubleToUInt64Bits(arg0);
uint64_t resultBits = EvaluateUnaryScalarSpecialized<uint64_t>(oper, arg0Bits);
return BitOperations::UInt64BitsToDouble(resultBits);
}

template <typename TBase>
TBase EvaluateUnaryScalar(genTreeOps oper, TBase arg0)
{
Expand All @@ -161,7 +194,7 @@ TBase EvaluateUnaryScalar(genTreeOps oper, TBase arg0)

default:
{
unreached();
return EvaluateUnaryScalarSpecialized<TBase>(oper, arg0);
}
}
}
Expand Down Expand Up @@ -268,6 +301,119 @@ void EvaluateUnarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd*
}
}

template <typename TBase>
TBase EvaluateBinaryScalarRSZ(TBase arg0, TBase arg1)
{
return arg0 >> (arg1 & ((sizeof(TBase) * 8) - 1));
}

template <>
inline int8_t EvaluateBinaryScalarRSZ<int8_t>(int8_t arg0, int8_t arg1)
{
uint8_t arg0Bits = static_cast<uint8_t>(arg0);
uint8_t arg1Bits = static_cast<uint8_t>(arg1);

uint8_t resultBits = EvaluateBinaryScalarRSZ<uint8_t>(arg0Bits, arg1Bits);
return static_cast<int8_t>(resultBits);
}

template <>
inline int16_t EvaluateBinaryScalarRSZ<int16_t>(int16_t arg0, int16_t arg1)
{
uint16_t arg0Bits = static_cast<uint16_t>(arg0);
uint16_t arg1Bits = static_cast<uint16_t>(arg1);

uint16_t resultBits = EvaluateBinaryScalarRSZ<uint16_t>(arg0Bits, arg1Bits);
return static_cast<int16_t>(resultBits);
}

template <>
inline int32_t EvaluateBinaryScalarRSZ<int32_t>(int32_t arg0, int32_t arg1)
{
uint32_t arg0Bits = static_cast<uint32_t>(arg0);
uint32_t arg1Bits = static_cast<uint32_t>(arg1);

uint32_t resultBits = EvaluateBinaryScalarRSZ<uint32_t>(arg0Bits, arg1Bits);
return static_cast<int32_t>(resultBits);
}

template <>
inline int64_t EvaluateBinaryScalarRSZ<int64_t>(int64_t arg0, int64_t arg1)
{
uint64_t arg0Bits = static_cast<uint64_t>(arg0);
uint64_t arg1Bits = static_cast<uint64_t>(arg1);

uint64_t resultBits = EvaluateBinaryScalarRSZ<uint64_t>(arg0Bits, arg1Bits);
return static_cast<int64_t>(resultBits);
}

template <typename TBase>
TBase EvaluateBinaryScalarSpecialized(genTreeOps oper, TBase arg0, TBase arg1)
{
switch (oper)
{
case GT_AND:
{
return arg0 & arg1;
}

case GT_AND_NOT:
{
return arg0 & ~arg1;
}

case GT_LSH:
{
return arg0 << (arg1 & ((sizeof(TBase) * 8) - 1));
}

case GT_OR:
{
return arg0 | arg1;
}

case GT_RSH:
{
return arg0 >> (arg1 & ((sizeof(TBase) * 8) - 1));
}

case GT_RSZ:
{
return EvaluateBinaryScalarRSZ<TBase>(arg0, arg1);
}

case GT_XOR:
{
return arg0 ^ arg1;
}

default:
{
unreached();
}
}
}

template <>
inline float EvaluateBinaryScalarSpecialized<float>(genTreeOps oper, float arg0, float arg1)
{
uint32_t arg0Bits = BitOperations::SingleToUInt32Bits(arg0);
uint32_t arg1Bits = BitOperations::SingleToUInt32Bits(arg1);

uint32_t resultBits = EvaluateBinaryScalarSpecialized<uint32_t>(oper, arg0Bits, arg1Bits);
return BitOperations::UInt32BitsToSingle(resultBits);
}

template <>
inline double EvaluateBinaryScalarSpecialized<double>(genTreeOps oper, double arg0, double arg1)
{
uint64_t arg0Bits = BitOperations::DoubleToUInt64Bits(arg0);
uint64_t arg1Bits = BitOperations::DoubleToUInt64Bits(arg1);

uint64_t resultBits = EvaluateBinaryScalarSpecialized<uint64_t>(oper, arg0Bits, arg1Bits);
return BitOperations::UInt64BitsToDouble(resultBits);
}

template <typename TBase>
TBase EvaluateBinaryScalar(genTreeOps oper, TBase arg0, TBase arg1)
{
Expand All @@ -278,14 +424,24 @@ TBase EvaluateBinaryScalar(genTreeOps oper, TBase arg0, TBase arg1)
return arg0 + arg1;
}

case GT_DIV:
{
return arg0 / arg1;
}

case GT_MUL:
{
return arg0 * arg1;
}

case GT_SUB:
{
return arg0 - arg1;
}

default:
{
unreached();
return EvaluateBinaryScalarSpecialized<TBase>(oper, arg0, arg1);
}
}
}
Expand Down Expand Up @@ -395,6 +551,18 @@ void EvaluateBinarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd*
}
}

template <typename TSimd, typename TBase>
void BroadcastConstantToSimd(TSimd* result, TBase arg0)
{
uint32_t count = sizeof(TSimd) / sizeof(TBase);

for (uint32_t i = 0; i < count; i++)
{
// Safely execute `result[i] = arg0`
memcpy(&result->u8[i * sizeof(TBase)], &arg0, sizeof(TBase));
}
}

#ifdef FEATURE_SIMD

#ifdef TARGET_XARCH
Expand Down
64 changes: 64 additions & 0 deletions src/coreclr/jit/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2645,6 +2645,22 @@ uint32_t BitOperations::BitScanReverse(uint64_t value)
#endif
}

//------------------------------------------------------------------------
// BitOperations::DoubleToUInt64Bits: Gets the underlying bits for a double-precision floating-point value.
//
// Arguments:
// value - The number to convert
//
// Return Value:
// The underlying bits for value.
//
uint64_t BitOperations::DoubleToUInt64Bits(double value)
{
uint64_t result;
memcpy(&result, &value, sizeof(double));
return result;
}

//------------------------------------------------------------------------
// BitOperations::LeadingZeroCount: Count the number of leading zero bits in a mask.
//
Expand Down Expand Up @@ -2932,6 +2948,22 @@ uint64_t BitOperations::RotateRight(uint64_t value, uint32_t offset)
return (value >> (offset & 0x3F)) | (value << ((64 - offset) & 0x3F));
}

//------------------------------------------------------------------------
// BitOperations::SingleToUInt32Bits: Gets the underlying bits for a single-precision floating-point value.
//
// Arguments:
// value - The number to convert
//
// Return Value:
// The underlying bits for value.
//
uint32_t BitOperations::SingleToUInt32Bits(float value)
{
uint32_t result;
memcpy(&result, &value, sizeof(float));
return result;
}

//------------------------------------------------------------------------
// BitOperations::TrailingZeroCount: Count the number of trailing zero bits in an integer value.
//
Expand Down Expand Up @@ -2980,6 +3012,38 @@ uint32_t BitOperations::TrailingZeroCount(uint64_t value)
#endif
}

//------------------------------------------------------------------------
// BitOperations::UInt32BitsToSingle: Gets a single-precision floating-point from its underlying bit value.
//
// Arguments:
// value - The underlying bit value.
//
// Return Value:
// The single-precision floating-point from value.
//
float BitOperations::UInt32BitsToSingle(uint32_t value)
{
float result;
memcpy(&result, &value, sizeof(uint32_t));
return result;
}

//------------------------------------------------------------------------
// BitOperations::UInt64BitsToDouble: Gets a double-precision floating-point from its underlying bit value.
//
// Arguments:
// value - The underlying bit value.
//
// Return Value:
// The double-precision floating-point from value.
//
double BitOperations::UInt64BitsToDouble(uint64_t value)
{
double result;
memcpy(&result, &value, sizeof(uint64_t));
return result;
}

namespace MagicDivide
{
template <int TableBase = 0, int TableSize, typename Magic>
Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/jit/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,8 @@ class BitOperations

static uint32_t BitScanReverse(uint64_t value);

static uint64_t DoubleToUInt64Bits(double value);

static uint32_t LeadingZeroCount(uint32_t value);

static uint32_t LeadingZeroCount(uint64_t value);
Expand All @@ -775,9 +777,15 @@ class BitOperations

static uint64_t RotateRight(uint64_t value, uint32_t offset);

static uint32_t SingleToUInt32Bits(float value);

static uint32_t TrailingZeroCount(uint32_t value);

static uint32_t TrailingZeroCount(uint64_t value);

static float UInt32BitsToSingle(uint32_t value);

static double UInt64BitsToDouble(uint64_t value);
};

// The CLR requires that critical section locks be initialized via its ClrCreateCriticalSection API...but
Expand Down
Loading

0 comments on commit 7219954

Please sign in to comment.