diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/FormattingHelpers.CountDigits.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/FormattingHelpers.CountDigits.cs index 8235339820dfe..eb339b89576d6 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/FormattingHelpers.CountDigits.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/FormattingHelpers.CountDigits.cs @@ -104,7 +104,8 @@ public static int CountDigits(uint value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int CountHexDigits(ulong value) { - return (64 - BitOperations.LeadingZeroCount(value | 1) + 3) >> 2; + // The number of hex digits is log16(value) + 1, or log2(value) / 4 + 1 + return (BitOperations.Log2(value) >> 2) + 1; } // Counts the number of trailing '0' digits in a decimal number. diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Utilities.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Utilities.cs index 7e1caa039b36c..9fe7233e16486 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Utilities.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Utilities.cs @@ -14,8 +14,13 @@ internal static class Utilities internal static int SelectBucketIndex(int bufferSize) { Debug.Assert(bufferSize >= 0); - uint bits = ((uint)bufferSize - 1) >> 4; - return 32 - BitOperations.LeadingZeroCount(bits); + + // Buffers are bucketed so that a request between 2^(n-1) + 1 and 2^n is given a buffer of 2^n + // Bucket index is log2(bufferSize - 1) with the exception that buffers between 1 and 16 bytes + // are combined, and the index is slid down by 3 to compensate. + // Zero is a valid bufferSize, and it is assigned the highest bucket index so that zero-length + // buffers are not retained by the pool. The pool will return the Array.Empty singleton for these. + return BitOperations.Log2((uint)bufferSize - 1 | 15) - 3; } [MethodImpl(MethodImplOptions.AggressiveInlining)] diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs index fe457200e960d..dfade9d6900c1 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs @@ -123,21 +123,18 @@ public static int LeadingZeroCount(ulong value) /// /// Returns the integer (floor) log of the specified value, base 2. - /// Note that by convention, input value 0 returns 0 since Log(0) is undefined. + /// Note that by convention, input value 0 returns 0 since log(0) is undefined. /// /// The value. [MethodImpl(MethodImplOptions.AggressiveInlining)] [CLSCompliant(false)] public static int Log2(uint value) { - // Enforce conventional contract 0->0 (Log(0) is undefined) - if (value == 0) - { - return 0; - } + // The 0->0 contract is fulfilled by setting the LSB to 1. + // Log(1) is 0, and setting the LSB for values > 1 does not change the log2 result. + value |= 1; // value lzcnt actual expected - // ..0000 32 0 0 (by convention, guard clause) // ..0001 31 31-31 0 // ..0010 30 31-30 1 // 0010.. 2 31-2 29 @@ -153,8 +150,8 @@ public static int Log2(uint value) return 31 ^ ArmBase.LeadingZeroCount(value); } - // BSR returns the answer we're looking for directly. - // However BSR is much slower than LZCNT on AMD processors, so we leave it as a fallback only. + // BSR returns the log2 result directly. However BSR is slower than LZCNT + // on AMD processors, so we leave it as a fallback only. if (X86Base.IsSupported) { return (int)X86Base.BitScanReverse(value); @@ -166,18 +163,14 @@ public static int Log2(uint value) /// /// Returns the integer (floor) log of the specified value, base 2. - /// Note that by convention, input value 0 returns 0 since Log(0) is undefined. + /// Note that by convention, input value 0 returns 0 since log(0) is undefined. /// /// The value. [MethodImpl(MethodImplOptions.AggressiveInlining)] [CLSCompliant(false)] public static int Log2(ulong value) { - // Enforce conventional contract 0->0 (Log(0) is undefined) - if (value == 0) - { - return 0; - } + value |= 1; if (Lzcnt.X64.IsSupported) { diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index f965f98ac7f5e..c20618c4482e3 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -1742,33 +1742,11 @@ private static int LocateLastFoundByte(Vector match) [MethodImpl(MethodImplOptions.AggressiveInlining)] private static int LocateFirstFoundByte(ulong match) - { - if (Bmi1.X64.IsSupported) - { - return (int)(Bmi1.X64.TrailingZeroCount(match) >> 3); - } - else - { - // Flag least significant power of two bit - ulong powerOfTwoFlag = match ^ (match - 1); - // Shift all powers of two into the high byte and extract - return (int)((powerOfTwoFlag * XorPowerOfTwoToHighByte) >> 57); - } - } + => BitOperations.TrailingZeroCount(match) >> 3; [MethodImpl(MethodImplOptions.AggressiveInlining)] private static int LocateLastFoundByte(ulong match) - { - return 7 - (BitOperations.LeadingZeroCount(match) >> 3); - } - - private const ulong XorPowerOfTwoToHighByte = (0x07ul | - 0x06ul << 8 | - 0x05ul << 16 | - 0x04ul << 24 | - 0x03ul << 32 | - 0x02ul << 40 | - 0x01ul << 48) + 1; + => BitOperations.Log2(match) >> 3; [MethodImpl(MethodImplOptions.AggressiveInlining)] private static ushort LoadUShort(ref byte start) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index 0f21d07c107c0..b4d4dfa331167 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -984,27 +984,7 @@ private static int LocateFirstFoundChar(Vector match) [MethodImpl(MethodImplOptions.AggressiveInlining)] private static int LocateFirstFoundChar(ulong match) - { - // TODO: Arm variants - if (Bmi1.X64.IsSupported) - { - return (int)(Bmi1.X64.TrailingZeroCount(match) >> 4); - } - else - { - unchecked - { - // Flag least significant power of two bit - ulong powerOfTwoFlag = match ^ (match - 1); - // Shift all powers of two into the high byte and extract - return (int)((powerOfTwoFlag * XorPowerOfTwoToHighChar) >> 49); - } - } - } - - private const ulong XorPowerOfTwoToHighChar = (0x03ul | - 0x02ul << 16 | - 0x01ul << 32) + 1; + => BitOperations.TrailingZeroCount(match) >> 4; // Vector sub-search adapted from https://github.com/aspnet/KestrelHttpServer/pull/1138 [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -1029,9 +1009,7 @@ private static int LocateLastFoundChar(Vector match) [MethodImpl(MethodImplOptions.AggressiveInlining)] private static int LocateLastFoundChar(ulong match) - { - return 3 - (BitOperations.LeadingZeroCount(match) >> 4); - } + => BitOperations.Log2(match) >> 4; [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector LoadVector(ref char start, nint offset) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.Helpers.cs b/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.Helpers.cs index 731d52ab822cc..71f984bbd4cfd 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.Helpers.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.Helpers.cs @@ -44,44 +44,19 @@ internal static uint CountNumberOfLeadingAsciiBytesFromUInt32WithSomeNonAsciiDat { Debug.Assert(!AllBytesInUInt32AreAscii(value), "Caller shouldn't provide an all-ASCII value."); - // Use BMI1 directly rather than going through BitOperations. We only see a perf gain here - // if we're able to emit a real tzcnt instruction; the software fallback used by BitOperations - // is too slow for our purposes since we can provide our own faster, specialized software fallback. - - if (Bmi1.IsSupported) - { - Debug.Assert(BitConverter.IsLittleEndian); - return Bmi1.TrailingZeroCount(value & UInt32HighBitsOnlyMask) >> 3; - } - - // Couldn't emit tzcnt, use specialized software fallback. - // The 'allBytesUpToNowAreAscii' DWORD uses bit twiddling to hold a 1 or a 0 depending - // on whether all processed bytes were ASCII. Then we accumulate all of the - // results to calculate how many consecutive ASCII bytes are present. - - value = ~value; - if (BitConverter.IsLittleEndian) { - // Read first byte - value >>= 7; - uint allBytesUpToNowAreAscii = value & 1; - uint numAsciiBytes = allBytesUpToNowAreAscii; - - // Read second byte - value >>= 8; - allBytesUpToNowAreAscii &= value; - numAsciiBytes += allBytesUpToNowAreAscii; - - // Read third byte - value >>= 8; - allBytesUpToNowAreAscii &= value; - numAsciiBytes += allBytesUpToNowAreAscii; - - return numAsciiBytes; + return (uint)BitOperations.TrailingZeroCount(value & UInt32HighBitsOnlyMask) >> 3; } else { + // Couldn't use tzcnt, use specialized software fallback. + // The 'allBytesUpToNowAreAscii' DWORD uses bit twiddling to hold a 1 or a 0 depending + // on whether all processed bytes were ASCII. Then we accumulate all of the + // results to calculate how many consecutive ASCII bytes are present. + + value = ~value; + // BinaryPrimitives.ReverseEndianness is only implemented as an intrinsic on // little-endian platforms, so using it in this big-endian path would be too // expensive. Instead we'll just change how we perform the shifts. diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs index c0bdbf03f7059..14c49c98a5f58 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs @@ -410,7 +410,7 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuin if ((bufferLength & 8) != 0) { - if (Bmi1.X64.IsSupported) + if (UIntPtr.Size == sizeof(ulong)) { // If we can use 64-bit tzcnt to count the number of leading ASCII bytes, prefer it. @@ -418,10 +418,10 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuin if (!AllBytesInUInt64AreAscii(candidateUInt64)) { // Clear everything but the high bit of each byte, then tzcnt. - // Remember the / 8 at the end to convert bit count to byte count. + // Remember to divide by 8 at the end to convert bit count to byte count. candidateUInt64 &= UInt64HighBitsOnlyMask; - pBuffer += (nuint)(Bmi1.X64.TrailingZeroCount(candidateUInt64) / 8); + pBuffer += (nuint)(BitOperations.TrailingZeroCount(candidateUInt64) >> 3); goto Finish; } } @@ -932,7 +932,7 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin if ((bufferLength & 4) != 0) { - if (Bmi1.X64.IsSupported) + if (UIntPtr.Size == sizeof(ulong)) { // If we can use 64-bit tzcnt to count the number of leading ASCII chars, prefer it. @@ -940,12 +940,12 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin if (!AllCharsInUInt64AreAscii(candidateUInt64)) { // Clear the low 7 bits (the ASCII bits) of each char, then tzcnt. - // Remember the / 8 at the end to convert bit count to byte count, + // Remember to divide by 8 at the end to convert bit count to byte count, // then the & ~1 at the end to treat a match in the high byte of // any char the same as a match in the low byte of that same char. candidateUInt64 &= 0xFF80FF80_FF80FF80ul; - pBuffer = (char*)((byte*)pBuffer + ((nuint)(Bmi1.X64.TrailingZeroCount(candidateUInt64) / 8) & ~(nuint)1)); + pBuffer = (char*)((byte*)pBuffer + ((nuint)(BitOperations.TrailingZeroCount(candidateUInt64) >> 3) & ~(nuint)1)); goto Finish; } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs index 8284828d53043..c487e1ec5e4df 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs @@ -144,7 +144,7 @@ private static void _ValidateAdditionalNIntDefinitions() do { - if (Sse2.IsSupported && Bmi1.IsSupported) + if (Sse2.IsSupported) { // pInputBuffer is 32-bit aligned but not necessary 128-bit aligned, so we're // going to perform an unaligned load. We don't necessarily care about aligning @@ -180,7 +180,6 @@ private static void _ValidateAdditionalNIntDefinitions() Debug.Assert(BitConverter.IsLittleEndian); Debug.Assert(Sse2.IsSupported); - Debug.Assert(Bmi1.IsSupported); // The 'mask' value will have a 0 bit for each ASCII byte we saw and a 1 bit // for each non-ASCII byte we saw. We can count the number of ASCII bytes, @@ -189,7 +188,7 @@ private static void _ValidateAdditionalNIntDefinitions() Debug.Assert(mask != 0); - pInputBuffer += Bmi1.TrailingZeroCount(mask); + pInputBuffer += BitOperations.TrailingZeroCount(mask); if (pInputBuffer > pFinalPosWhereCanReadDWordFromInputBuffer) { goto ProcessRemainingBytesSlow;