diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs index d1c7d5ee8a726c..31aac455cd071d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs @@ -2718,6 +2718,24 @@ public static unsafe void StoreAligned(this Vector128 source, T* destinati public static unsafe void StoreAlignedNonTemporal(this Vector128 source, T* destination) where T : unmanaged => source.StoreAligned(destination); + /// + /// Stores to lower 64 bits of to memory destination of [] + /// + /// The type of the elements in the vector. + /// The vector that will be stored. + /// The destination to which will be added before the vector will be stored. + /// The element offset from from which the vector will be stored. + /// + /// Uses double instead of long to get a single instruction instead of storing temps on general porpose register (or stack) + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static void StoreLowerUnsafe(this Vector128 source, ref T destination, nuint elementOffset = 0) + where T : struct + { + ref byte address = ref Unsafe.As(ref Unsafe.Add(ref destination, elementOffset)); + Unsafe.WriteUnaligned(ref address, source.AsDouble().ToScalar()); + } + /// Stores a vector at the given destination. /// The type of the elements in the vector. /// The vector that will be stored. diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.CaseConversion.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.CaseConversion.cs index c226161ec5749f..a9cdc30f570e06 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.CaseConversion.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.CaseConversion.cs @@ -463,41 +463,6 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD return i; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe void Widen8To16AndAndWriteTo(Vector128 narrowVector, char* pDest, nuint destOffset) - { - if (Vector256.IsHardwareAccelerated) - { - Vector256 wide = Vector256.WidenLower(narrowVector.ToVector256Unsafe()); - wide.StoreUnsafe(ref *(ushort*)pDest, destOffset); - } - else - { - Vector128.WidenLower(narrowVector).StoreUnsafe(ref *(ushort*)pDest, destOffset); - Vector128.WidenUpper(narrowVector).StoreUnsafe(ref *(ushort*)pDest, destOffset + 8); - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe void Narrow16To8AndAndWriteTo(Vector128 wideVector, byte* pDest, nuint destOffset) - { - Vector128 narrow = Vector128.Narrow(wideVector, wideVector); - - if (Sse2.IsSupported) - { - // MOVQ is supported even on x86, unaligned accesses allowed - Sse2.StoreScalar((ulong*)(pDest + destOffset), narrow.AsUInt64()); - } - else if (Vector64.IsHardwareAccelerated) - { - narrow.GetLower().StoreUnsafe(ref *pDest, destOffset); - } - else - { - Unsafe.WriteUnaligned(pDest + destOffset, narrow.AsUInt64().ToScalar()); - } - } - [MethodImpl(MethodImplOptions.AggressiveInlining)] private static unsafe void ChangeWidthAndWriteTo(Vector128 vector, TTo* pDest, nuint elementOffset) where TFrom : unmanaged @@ -524,12 +489,9 @@ private static unsafe void ChangeWidthAndWriteTo(Vector128 ve } else if (sizeof(TFrom) == 2 && sizeof(TTo) == 1) { - // narrowing operation required - // since we know data is all-ASCII, special-case SSE2 to avoid unneeded PAND in Narrow call - Vector128 narrow = (Sse2.IsSupported) - ? Sse2.PackUnsignedSaturate(vector.AsInt16(), vector.AsInt16()) - : Vector128.Narrow(vector.AsUInt16(), vector.AsUInt16()); - narrow.GetLower().StoreUnsafe(ref *(byte*)pDest, elementOffset); + // narrowing operation required, we know data is all-ASCII so use extract helper + Vector128 narrow = ExtractAsciiVector(vector.AsUInt16(), vector.AsUInt16()); + narrow.StoreLowerUnsafe(ref *(byte*)pDest, elementOffset); } else { @@ -556,25 +518,6 @@ private static unsafe Vector128 SignedLessThan(Vector128 left, Vector12 } } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe Vector128 NarrowOrWidenLowerVectorUnsigned(Vector128 vector) - where TFrom : unmanaged - where TTo : unmanaged - { - if (sizeof(TFrom) == 1 && sizeof(TTo) == 2) - { - return Vector128.WidenLower(vector.AsByte()).As(); - } - else if (sizeof(TFrom) == 2 && sizeof(TTo) == 1) - { - return Vector128.Narrow(vector.AsUInt16(), vector.AsUInt16()).As(); - } - else - { - throw new NotSupportedException(); - } - } - private struct ToUpperConversion { } private struct ToLowerConversion { } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs index 4acf6e82baa6ac..28df8bb5a31d4a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs @@ -1510,6 +1510,7 @@ private static Vector128 ExtractAsciiVector(Vector128 vectorFirst, } } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static unsafe nuint NarrowUtf16ToAscii_Intrinsified(char* pUtf16Buffer, byte* pAsciiBuffer, nuint elementCount) { // This method contains logic optimized using vector instructions for both x64 and Arm64. @@ -1542,7 +1543,7 @@ private static unsafe nuint NarrowUtf16ToAscii_Intrinsified(char* pUtf16Buffer, ref byte asciiBuffer = ref *pAsciiBuffer; Vector128 asciiVector = ExtractAsciiVector(utf16VectorFirst, utf16VectorFirst); - asciiVector.GetLower().StoreUnsafe(ref asciiBuffer); + asciiVector.StoreLowerUnsafe(ref asciiBuffer, 0); nuint currentOffsetInElements = SizeOfVector128 / 2; // we processed 8 elements so far // We're going to get the best performance when we have aligned writes, so we'll take the @@ -1569,7 +1570,7 @@ private static unsafe nuint NarrowUtf16ToAscii_Intrinsified(char* pUtf16Buffer, // Turn the 8 ASCII chars we just read into 8 ASCII bytes, then copy it to the destination. asciiVector = ExtractAsciiVector(utf16VectorFirst, utf16VectorFirst); - asciiVector.GetLower().StoreUnsafe(ref asciiBuffer, currentOffsetInElements); + asciiVector.StoreLowerUnsafe(ref asciiBuffer, currentOffsetInElements); } // Calculate how many elements we wrote in order to get pAsciiBuffer to its next alignment @@ -1622,7 +1623,7 @@ private static unsafe nuint NarrowUtf16ToAscii_Intrinsified(char* pUtf16Buffer, Debug.Assert(((nuint)pAsciiBuffer + currentOffsetInElements) % sizeof(ulong) == 0, "Destination should be ulong-aligned."); asciiVector = ExtractAsciiVector(utf16VectorFirst, utf16VectorFirst); - asciiVector.GetLower().StoreUnsafe(ref asciiBuffer, currentOffsetInElements); + asciiVector.StoreLowerUnsafe(ref asciiBuffer, currentOffsetInElements); currentOffsetInElements += SizeOfVector128 / 2; goto Finish;