From 3722cc9bf82d7dc75ce32830af00539f0c0fc793 Mon Sep 17 00:00:00 2001 From: Theodore Tsirpanis Date: Sun, 5 Jan 2025 13:48:56 +0200 Subject: [PATCH 1/7] Remove parameters that take only one value. --- .../src/System/Reflection/AssemblyName.cs | 43 ++++--------------- 1 file changed, 9 insertions(+), 34 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs b/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs index 521e804b6e8126..59d7fab7b54073 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs @@ -310,7 +310,7 @@ internal static string EscapeCodeBase(string? codebase) return string.Empty; int position = 0; - char[]? dest = EscapeString(codebase, 0, codebase.Length, null, ref position, true, c_DummyChar, c_DummyChar, c_DummyChar); + char[]? dest = EscapeString(codebase, null, ref position); if (dest == null) return codebase; @@ -318,34 +318,29 @@ internal static string EscapeCodeBase(string? codebase) } // This implementation of EscapeString has been copied from System.Private.Uri from the runtime repo - // - forceX characters are always escaped if found - // - rsvd character will remain unescaped // - // start - starting offset from input - // end - the exclusive ending offset in input // destPos - starting offset in dest for output, on return this will be an exclusive "end" in the output. // // In case "dest" has lack of space it will be reallocated by preserving the _whole_ content up to current destPos // // Returns null if nothing has to be escaped AND passed dest was null, otherwise the resulting array with the updated destPos // - internal static unsafe char[]? EscapeString(string input, int start, int end, char[]? dest, ref int destPos, - bool isUriString, char force1, char force2, char rsvd) + internal static unsafe char[]? EscapeString(string input, char[]? dest, ref int destPos) { - int i = start; - int prevInputPos = start; + int i = 0; + int prevInputPos = 0; byte* bytes = stackalloc byte[c_MaxUnicodeCharsReallocate * c_MaxUTF_8BytesPerUnicodeChar]; // 40*4=160 fixed (char* pStr = input) { - for (; i < end; ++i) + for (; i < input.Length; ++i) { char ch = pStr[i]; // a Unicode ? if (ch > '\x7F') { - short maxSize = (short)Math.Min(end - i, (int)c_MaxUnicodeCharsReallocate - 1); + short maxSize = (short)Math.Min(input.Length - i, (int)c_MaxUnicodeCharsReallocate - 1); short count = 1; for (; count < maxSize && pStr[i + count] > '\x7f'; ++count) ; @@ -354,7 +349,7 @@ internal static string EscapeCodeBase(string? codebase) if (pStr[i + count - 1] >= 0xD800 && pStr[i + count - 1] <= 0xDBFF) { // Should be a rare case where the app tries to feed an invalid Unicode surrogates pair - if (count == 1 || count == end - i) + if (count == 1 || count == input.Length - i) throw new FormatException(SR.Arg_FormatException); // need to grab one more char as a Surrogate except when it's a bogus input ++count; @@ -380,26 +375,7 @@ internal static string EscapeCodeBase(string? codebase) prevInputPos = i + 1; } - else if (ch == '%' && rsvd == '%') - { - // Means we don't reEncode '%' but check for the possible escaped sequence - dest = EnsureDestinationSize(pStr, dest, i, c_EncodedCharsPerByte, - c_MaxAsciiCharsReallocate * c_EncodedCharsPerByte, ref destPos, prevInputPos); - if (i + 2 < end && char.IsAsciiHexDigit(pStr[i + 1]) && char.IsAsciiHexDigit(pStr[i + 2])) - { - // leave it escaped - dest[destPos++] = '%'; - dest[destPos++] = pStr[i + 1]; - dest[destPos++] = pStr[i + 2]; - i += 2; - } - else - { - EscapeAsciiChar('%', dest, ref destPos); - } - prevInputPos = i + 1; - } - else if (ch == force1 || ch == force2 || (ch != rsvd && (isUriString ? !IsReservedUnreservedOrHash(ch) : !IsUnreserved(ch)))) + else if (!IsReservedUnreservedOrHash(ch)) { dest = EnsureDestinationSize(pStr, dest, i, c_EncodedCharsPerByte, c_MaxAsciiCharsReallocate * c_EncodedCharsPerByte, ref destPos, prevInputPos); @@ -411,7 +387,7 @@ internal static string EscapeCodeBase(string? codebase) if (prevInputPos != i) { // need to fill up the dest array ? - if (prevInputPos != start || dest != null) + if (prevInputPos != 0 || dest != null) dest = EnsureDestinationSize(pStr, dest, i, 0, 0, ref destPos, prevInputPos); } } @@ -466,7 +442,6 @@ internal static bool IsUnreserved(char c) return RFC3986UnreservedMarks.Contains(c); } - internal const char c_DummyChar = (char)0xFFFF; // An Invalid Unicode character used as a dummy char passed into the parameter private const short c_MaxAsciiCharsReallocate = 40; private const short c_MaxUnicodeCharsReallocate = 40; private const short c_MaxUTF_8BytesPerUnicodeChar = 4; From 06c7d8f585b9dc0bf7ecdc4cc60403e966138a87 Mon Sep 17 00:00:00 2001 From: Theodore Tsirpanis Date: Sun, 5 Jan 2025 13:54:39 +0200 Subject: [PATCH 2/7] Use `SearchValues`. --- .../src/System/Reflection/AssemblyName.cs | 23 +++---------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs b/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs index 59d7fab7b54073..6fbf9cf6e5b3aa 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers; using System.ComponentModel; using System.Configuration.Assemblies; using System.Diagnostics.CodeAnalysis; @@ -375,7 +376,7 @@ internal static string EscapeCodeBase(string? codebase) prevInputPos = i + 1; } - else if (!IsReservedUnreservedOrHash(ch)) + else if (!UnreservedReserved.Contains(ch)) { dest = EnsureDestinationSize(pStr, dest, i, c_EncodedCharsPerByte, c_MaxAsciiCharsReallocate * c_EncodedCharsPerByte, ref destPos, prevInputPos); @@ -424,29 +425,11 @@ internal static void EscapeAsciiChar(char ch, char[] to, ref int pos) to[pos++] = HexConverter.ToCharUpper(ch); } - private static bool IsReservedUnreservedOrHash(char c) - { - if (IsUnreserved(c)) - { - return true; - } - return RFC3986ReservedMarks.Contains(c); - } - - internal static bool IsUnreserved(char c) - { - if (char.IsAsciiLetterOrDigit(c)) - { - return true; - } - return RFC3986UnreservedMarks.Contains(c); - } + private static readonly SearchValues UnreservedReserved = SearchValues.Create("!#$&'()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz~"); private const short c_MaxAsciiCharsReallocate = 40; private const short c_MaxUnicodeCharsReallocate = 40; private const short c_MaxUTF_8BytesPerUnicodeChar = 4; private const short c_EncodedCharsPerByte = 3; - private const string RFC3986ReservedMarks = ":/?#[]@!$&'()*+,;="; - private const string RFC3986UnreservedMarks = "-._~"; } } From 8bfdb90d5e5776ab3771ffa2a634a41be8622f6a Mon Sep 17 00:00:00 2001 From: Theodore Tsirpanis Date: Sun, 5 Jan 2025 14:28:41 +0200 Subject: [PATCH 3/7] Simplify `AssemblyName.EscapeCodeBase` and remove `unsafe`. --- .../src/System/Reflection/AssemblyName.cs | 151 +++++++----------- 1 file changed, 55 insertions(+), 96 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs b/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs index 6fbf9cf6e5b3aa..4a9b1dfb7bf5ee 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs @@ -4,6 +4,7 @@ using System.Buffers; using System.ComponentModel; using System.Configuration.Assemblies; +using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Runtime.Serialization; using System.Text; @@ -304,132 +305,90 @@ public static bool ReferenceMatchesDefinition(AssemblyName? reference, AssemblyN return refName.Equals(defName, StringComparison.OrdinalIgnoreCase); } - [RequiresAssemblyFiles("The code will return an empty string for assemblies embedded in a single-file app")] + // This implementation of Escape has been copied from UriHelper from System.Private.Uri and adapted to match AssemblyName's requirements. internal static string EscapeCodeBase(string? codebase) { if (codebase == null) return string.Empty; - int position = 0; - char[]? dest = EscapeString(codebase, null, ref position); - if (dest == null) + int indexOfFirstToEscape = codebase.AsSpan().IndexOfAnyExcept(UnreservedReserved); + if (indexOfFirstToEscape < 0) + { + // Nothing to escape, just return the original value. return codebase; + } + + // Otherwise, create a ValueStringBuilder to store the escaped data into, + // escape the rest, and concat the result with the characters we skipped above. + var vsb = new ValueStringBuilder(stackalloc char[StackallocThreshold]); - return new string(dest, 0, position); + // We may throw for very large inputs (when growing the ValueStringBuilder). + vsb.EnsureCapacity(codebase.Length); + + EscapeStringToBuilder(codebase.AsSpan(indexOfFirstToEscape), ref vsb); + + string result = string.Concat(codebase.AsSpan(0, indexOfFirstToEscape), vsb.ToString()); + vsb.Dispose(); + return result; } - // This implementation of EscapeString has been copied from System.Private.Uri from the runtime repo - // - // destPos - starting offset in dest for output, on return this will be an exclusive "end" in the output. - // - // In case "dest" has lack of space it will be reallocated by preserving the _whole_ content up to current destPos - // - // Returns null if nothing has to be escaped AND passed dest was null, otherwise the resulting array with the updated destPos - // - internal static unsafe char[]? EscapeString(string input, char[]? dest, ref int destPos) + internal static void EscapeStringToBuilder(scoped ReadOnlySpan stringToEscape, ref ValueStringBuilder vsb) { - int i = 0; - int prevInputPos = 0; - byte* bytes = stackalloc byte[c_MaxUnicodeCharsReallocate * c_MaxUTF_8BytesPerUnicodeChar]; // 40*4=160 + // Allocate enough stack space to hold any Rune's UTF8 encoding. + Span utf8Bytes = stackalloc byte[4]; - fixed (char* pStr = input) + while (!stringToEscape.IsEmpty) { - for (; i < input.Length; ++i) - { - char ch = pStr[i]; + char ch = stringToEscape[0]; - // a Unicode ? - if (ch > '\x7F') + if (!char.IsAscii(ch)) + { + if (Rune.DecodeFromUtf16(stringToEscape, out Rune r, out int charsConsumed) != OperationStatus.Done) { - short maxSize = (short)Math.Min(input.Length - i, (int)c_MaxUnicodeCharsReallocate - 1); - - short count = 1; - for (; count < maxSize && pStr[i + count] > '\x7f'; ++count) ; - - // Is the last a high surrogate? - if (pStr[i + count - 1] >= 0xD800 && pStr[i + count - 1] <= 0xDBFF) - { - // Should be a rare case where the app tries to feed an invalid Unicode surrogates pair - if (count == 1 || count == input.Length - i) - throw new FormatException(SR.Arg_FormatException); - // need to grab one more char as a Surrogate except when it's a bogus input - ++count; - } - - dest = EnsureDestinationSize(pStr, dest, i, - (short)(count * c_MaxUTF_8BytesPerUnicodeChar * c_EncodedCharsPerByte), - c_MaxUnicodeCharsReallocate * c_MaxUTF_8BytesPerUnicodeChar * c_EncodedCharsPerByte, - ref destPos, prevInputPos); - - short numberOfBytes = (short)Encoding.UTF8.GetBytes(pStr + i, count, bytes, - c_MaxUnicodeCharsReallocate * c_MaxUTF_8BytesPerUnicodeChar); - - // This is the only exception that built in UriParser can throw after a Uri ctor. - // Should not happen unless the app tries to feed an invalid Unicode string - if (numberOfBytes == 0) - throw new FormatException(SR.Arg_FormatException); + r = Rune.ReplacementChar; + } - i += (count - 1); + Debug.Assert(stringToEscape.EnumerateRunes() is { } e && e.MoveNext() && e.Current == r); + Debug.Assert(charsConsumed is 1 or 2); - for (count = 0; count < numberOfBytes; ++count) - EscapeAsciiChar((char)bytes[count], dest, ref destPos); + stringToEscape = stringToEscape.Slice(charsConsumed); - prevInputPos = i + 1; - } - else if (!UnreservedReserved.Contains(ch)) + // The rune is non-ASCII, so encode it as UTF8, and escape each UTF8 byte. + r.TryEncodeToUtf8(utf8Bytes, out int bytesWritten); + foreach (byte b in utf8Bytes.Slice(0, bytesWritten)) { - dest = EnsureDestinationSize(pStr, dest, i, c_EncodedCharsPerByte, - c_MaxAsciiCharsReallocate * c_EncodedCharsPerByte, ref destPos, prevInputPos); - EscapeAsciiChar(ch, dest, ref destPos); - prevInputPos = i + 1; + PercentEncodeByte(b, ref vsb); } } - - if (prevInputPos != i) + else if (!UnreservedReserved.Contains(ch)) { - // need to fill up the dest array ? - if (prevInputPos != 0 || dest != null) - dest = EnsureDestinationSize(pStr, dest, i, 0, 0, ref destPos, prevInputPos); + PercentEncodeByte((byte)ch, ref vsb); + stringToEscape = stringToEscape[1..]; } - } - - return dest; - } - - // - // ensure destination array has enough space and contains all the needed input stuff - // - private static unsafe char[] EnsureDestinationSize(char* pStr, char[]? dest, int currentInputPos, - short charsToAdd, short minReallocateChars, ref int destPos, int prevInputPos) - { - if (dest is null || dest.Length < destPos + (currentInputPos - prevInputPos) + charsToAdd) - { - // allocating or reallocating array by ensuring enough space based on maxCharsToAdd. - char[] newresult = new char[destPos + (currentInputPos - prevInputPos) + minReallocateChars]; + else + { + // We have a character we don't want to escape. It's likely there are more, do a vectorized search. + int charsToCopy = stringToEscape.IndexOfAnyExcept(UnreservedReserved); + if (charsToCopy < 0) + { + charsToCopy = stringToEscape.Length; + } + Debug.Assert(charsToCopy > 0); - if (dest is not null && destPos != 0) - Buffer.BlockCopy(dest, 0, newresult, 0, destPos << 1); - dest = newresult; + vsb.Append(stringToEscape.Slice(0, charsToCopy)); + stringToEscape = stringToEscape.Slice(charsToCopy); + } } - - // ensuring we copied everything form the input string left before last escaping - while (prevInputPos != currentInputPos) - dest[destPos++] = pStr[prevInputPos++]; - return dest; } - internal static void EscapeAsciiChar(char ch, char[] to, ref int pos) + internal static void PercentEncodeByte(byte ch, ref ValueStringBuilder vsb) { - to[pos++] = '%'; - to[pos++] = HexConverter.ToCharUpper(ch >> 4); - to[pos++] = HexConverter.ToCharUpper(ch); + vsb.Append('%'); + HexConverter.ToCharsBuffer(ch, vsb.AppendSpan(2), 0, HexConverter.Casing.Upper); } private static readonly SearchValues UnreservedReserved = SearchValues.Create("!#$&'()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz~"); - private const short c_MaxAsciiCharsReallocate = 40; - private const short c_MaxUnicodeCharsReallocate = 40; - private const short c_MaxUTF_8BytesPerUnicodeChar = 4; - private const short c_EncodedCharsPerByte = 3; + private const int StackallocThreshold = 512; } } From 392ceb2be79acb6b84ce91dc05e67c7943ab9737 Mon Sep 17 00:00:00 2001 From: Theodore Tsirpanis Date: Sun, 5 Jan 2025 22:58:14 +0200 Subject: [PATCH 4/7] Lazily initialize `SearchValues`. --- .../src/System/Reflection/AssemblyName.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs b/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs index 4a9b1dfb7bf5ee..6eea8d074e0ffc 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs @@ -387,7 +387,7 @@ internal static void PercentEncodeByte(byte ch, ref ValueStringBuilder vsb) HexConverter.ToCharsBuffer(ch, vsb.AppendSpan(2), 0, HexConverter.Casing.Upper); } - private static readonly SearchValues UnreservedReserved = SearchValues.Create("!#$&'()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz~"); + private static SearchValues UnreservedReserved => field ??= SearchValues.Create("!#$&'()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz~"); private const int StackallocThreshold = 512; } From 7eac9deadadef8bab9be5adb22404d2d5e9361b6 Mon Sep 17 00:00:00 2001 From: Theodore Tsirpanis Date: Mon, 6 Jan 2025 02:47:54 +0200 Subject: [PATCH 5/7] Suppress nullability warning. --- .../src/System/Reflection/AssemblyName.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs b/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs index 6eea8d074e0ffc..4bca651243ab4b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs @@ -387,7 +387,9 @@ internal static void PercentEncodeByte(byte ch, ref ValueStringBuilder vsb) HexConverter.ToCharsBuffer(ch, vsb.AppendSpan(2), 0, HexConverter.Casing.Upper); } +#pragma warning disable CS9264 // nullability of `field`: https://github.com/dotnet/csharplang/issues/8425 private static SearchValues UnreservedReserved => field ??= SearchValues.Create("!#$&'()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz~"); +#pragma warning restore CS9264 private const int StackallocThreshold = 512; } From cfbb548d9f74e0beead14675483863157333daeb Mon Sep 17 00:00:00 2001 From: Theodore Tsirpanis Date: Mon, 6 Jan 2025 18:14:29 +0200 Subject: [PATCH 6/7] Address PR feedback. --- .../src/System/Reflection/AssemblyName.cs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs b/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs index 4bca651243ab4b..2921e8733d9e2b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs @@ -363,7 +363,7 @@ internal static void EscapeStringToBuilder(scoped ReadOnlySpan stringToEsc else if (!UnreservedReserved.Contains(ch)) { PercentEncodeByte((byte)ch, ref vsb); - stringToEscape = stringToEscape[1..]; + stringToEscape = stringToEscape.Slice(1); } else { @@ -387,9 +387,8 @@ internal static void PercentEncodeByte(byte ch, ref ValueStringBuilder vsb) HexConverter.ToCharsBuffer(ch, vsb.AppendSpan(2), 0, HexConverter.Casing.Upper); } -#pragma warning disable CS9264 // nullability of `field`: https://github.com/dotnet/csharplang/issues/8425 + [field: AllowNull] private static SearchValues UnreservedReserved => field ??= SearchValues.Create("!#$&'()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz~"); -#pragma warning restore CS9264 private const int StackallocThreshold = 512; } From c9ba1e2c2e6b1e9e049ae48ff4ccbe5623a6a562 Mon Sep 17 00:00:00 2001 From: Theodore Tsirpanis Date: Mon, 20 Jan 2025 02:53:38 +0200 Subject: [PATCH 7/7] Remove unnecessary allocation. --- .../src/System/Reflection/AssemblyName.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs b/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs index 2921e8733d9e2b..f7d96dd1721912 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs @@ -327,7 +327,7 @@ internal static string EscapeCodeBase(string? codebase) EscapeStringToBuilder(codebase.AsSpan(indexOfFirstToEscape), ref vsb); - string result = string.Concat(codebase.AsSpan(0, indexOfFirstToEscape), vsb.ToString()); + string result = string.Concat(codebase.AsSpan(0, indexOfFirstToEscape), vsb.AsSpan()); vsb.Dispose(); return result; }