-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Extend the list of recognized SearchValues<char>
field names in Regex
#107402
Conversation
Tagging subscribers to this area: @dotnet/area-system-text-regularexpressions |
@MihuBot regexdiff |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice, thanks. Does this provide any further insights into ways we may want to / be able to further optimize SearchValues (i.e. special-casing based on the frequency of various character classes)?
923 out of 18857 patterns have generated source code changes. Examples of GeneratedRegex source diffs"[^a-zA-Z0-9_.]" (20636 uses)[GeneratedRegex("[^a-zA-Z0-9_.]")] {
// The pattern begins with a character in the set [^.0-9A-Z_a-z].
// Find the next occurrence. If it can't be found, there's no match.
- int i = inputSpan.Slice(pos).IndexOfAnyExcept(Utilities.s_ascii_40FF03FEFFFF87FEFFFF07);
+ int i = inputSpan.Slice(pos).IndexOfAnyExcept(Utilities.s_asciiLettersAndDigitsAndDotUnderscore);
if (i >= 0)
{
base.runtextpos = pos + i;
internal static readonly bool s_hasTimeout = s_defaultTimeout != Regex.InfiniteMatchTimeout;
/// <summary>Supports searching for characters in or not in ".0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz".</summary>
- internal static readonly SearchValues<char> s_ascii_40FF03FEFFFF87FEFFFF07 = SearchValues.Create(".0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz");
+ internal static readonly SearchValues<char> s_asciiLettersAndDigitsAndDotUnderscore = SearchValues.Create(".0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz");
}
} "\\A(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\\.[a-z ..." (5703 uses)[GeneratedRegex("\\A(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?)\\Z", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.CultureInvariant)] //{
charloop_starting_pos1 = pos;
- int iteration2 = slice.IndexOfAnyExcept(Utilities.s_nonAscii_2D5586687DC37F0329E3CA4127326E68B5A3A090B13B7834AEA7BFC4EDDE220F);
+ int iteration2 = slice.IndexOfAnyExcept(Utilities.s_asciiLettersAndDigitsAndDashKelvinSign);
if (iteration2 < 0)
{
iteration2 = slice.Length;
}
if (charloop_starting_pos1 >= charloop_ending_pos1 ||
- (charloop_ending_pos1 = inputSpan.Slice(charloop_starting_pos1, charloop_ending_pos1 - charloop_starting_pos1).LastIndexOfAny(Utilities.s_nonAscii_46E3FAA2E94950B9D41E9AB1B570CAB55D04A30009110072B4BC074D57272527)) < 0)
+ (charloop_ending_pos1 = inputSpan.Slice(charloop_starting_pos1, charloop_ending_pos1 - charloop_starting_pos1).LastIndexOfAny(Utilities.s_asciiLettersAndDigitsAndKelvinSign)) < 0)
{
goto LoopIterationNoMatch2;
}
//{
charloop_starting_pos2 = pos;
- int iteration3 = slice.IndexOfAnyExcept(Utilities.s_nonAscii_2D5586687DC37F0329E3CA4127326E68B5A3A090B13B7834AEA7BFC4EDDE220F);
+ int iteration3 = slice.IndexOfAnyExcept(Utilities.s_asciiLettersAndDigitsAndDashKelvinSign);
if (iteration3 < 0)
{
iteration3 = slice.Length;
}
if (charloop_starting_pos2 >= charloop_ending_pos2 ||
- (charloop_ending_pos2 = inputSpan.Slice(charloop_starting_pos2, charloop_ending_pos2 - charloop_starting_pos2).LastIndexOfAny(Utilities.s_nonAscii_46E3FAA2E94950B9D41E9AB1B570CAB55D04A30009110072B4BC074D57272527)) < 0)
+ (charloop_ending_pos2 = inputSpan.Slice(charloop_starting_pos2, charloop_ending_pos2 - charloop_starting_pos2).LastIndexOfAny(Utilities.s_asciiLettersAndDigitsAndKelvinSign)) < 0)
{
goto LoopIterationNoMatch3;
}
}
/// <summary>Supports searching for characters in or not in "-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzK".</summary>
- internal static readonly SearchValues<char> s_nonAscii_2D5586687DC37F0329E3CA4127326E68B5A3A090B13B7834AEA7BFC4EDDE220F = SearchValues.Create("-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzK");
+ internal static readonly SearchValues<char> s_asciiLettersAndDigitsAndDashKelvinSign = SearchValues.Create("-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzK");
/// <summary>Supports searching for characters in or not in "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzK".</summary>
- internal static readonly SearchValues<char> s_nonAscii_46E3FAA2E94950B9D41E9AB1B570CAB55D04A30009110072B4BC074D57272527 = SearchValues.Create("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzK");
+ internal static readonly SearchValues<char> s_asciiLettersAndDigitsAndKelvinSign = SearchValues.Create("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzK");
/// <summary>Supports searching for characters in or not in "!#$%&'*+-/0123456789=?ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz{|}~K".</summary>
internal static readonly SearchValues<char> s_nonAscii_7ADC7465A855D0F7DBF80EE65A2D8E62A35EDBB78D224DA31A8C190D13710E4D = SearchValues.Create("!#$%&'*+-/0123456789=?ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz{|}~K"); "(?<lang>[a-z]{2,8})(?:(?:\\-(?<script>[a-zA- ..." (5593 uses)[GeneratedRegex("(?<lang>[a-z]{2,8})(?:(?:\\-(?<script>[a-zA-Z]+))?\\-(?<reg>[A-Z]+))?", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.CultureInvariant)] ReadOnlySpan<char> span = inputSpan.Slice(pos);
for (int i = 0; i < span.Length - 1; i++)
{
- int indexOfPos = span.Slice(i).IndexOfAny(Utilities.s_nonAscii_326E1FD0AD567A84CAD13F2BE521A57789829F59D59ABE37F9E111D0182B6601);
+ int indexOfPos = span.Slice(i).IndexOfAny(Utilities.s_asciiLettersAndKelvinSign);
if (indexOfPos < 0)
{
goto NoMatchFound;
//{
charloop_starting_pos1 = pos;
- int iteration1 = slice.IndexOfAnyExcept(Utilities.s_nonAscii_326E1FD0AD567A84CAD13F2BE521A57789829F59D59ABE37F9E111D0182B6601);
+ int iteration1 = slice.IndexOfAnyExcept(Utilities.s_asciiLettersAndKelvinSign);
if (iteration1 < 0)
{
iteration1 = slice.Length;
// Match a character in the set [A-Za-z\u212A] atomically at least once.
{
- int iteration2 = slice.IndexOfAnyExcept(Utilities.s_nonAscii_326E1FD0AD567A84CAD13F2BE521A57789829F59D59ABE37F9E111D0182B6601);
+ int iteration2 = slice.IndexOfAnyExcept(Utilities.s_asciiLettersAndKelvinSign);
if (iteration2 < 0)
{
iteration2 = slice.Length;
}
/// <summary>Supports searching for characters in or not in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzK".</summary>
- internal static readonly SearchValues<char> s_nonAscii_326E1FD0AD567A84CAD13F2BE521A57789829F59D59ABE37F9E111D0182B6601 = SearchValues.Create("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzK");
+ internal static readonly SearchValues<char> s_asciiLettersAndKelvinSign = SearchValues.Create("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzK");
}
} "%\\(\\s* (?<ITEM_SPECIFICATION>(?<ITEM_TYPE> ..." (3976 uses)[GeneratedRegex("%\\(\\s* (?<ITEM_SPECIFICATION>(?<ITEM_TYPE>[A-Za-z_][A-Za-z_0-9\\-]*)\\s*\\.\\s*)? (?<NAME>[A-Za-z_][A-Za-z_0-9\\-]*) \\s*\\)", RegexOptions.ExplicitCapture | RegexOptions.IgnorePatternWhitespace)] // Match a character in the set [-0-9A-Z_a-z] atomically any number of times.
{
- int iteration1 = slice.Slice(1).IndexOfAnyExcept(Utilities.s_ascii_20FF03FEFFFF87FEFFFF07);
+ int iteration1 = slice.Slice(1).IndexOfAnyExcept(Utilities.s_asciiLettersAndDigitsAndDashUnderscore);
if (iteration1 < 0)
{
iteration1 = slice.Length - 1;
// Match a character in the set [-0-9A-Z_a-z] atomically any number of times.
{
- int iteration4 = slice.Slice(1).IndexOfAnyExcept(Utilities.s_ascii_20FF03FEFFFF87FEFFFF07);
+ int iteration4 = slice.Slice(1).IndexOfAnyExcept(Utilities.s_asciiLettersAndDigitsAndDashUnderscore);
if (iteration4 < 0)
{
iteration4 = slice.Length - 1;
}
/// <summary>Supports searching for characters in or not in "-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz".</summary>
- internal static readonly SearchValues<char> s_ascii_20FF03FEFFFF87FEFFFF07 = SearchValues.Create("-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz");
+ internal static readonly SearchValues<char> s_asciiLettersAndDigitsAndDashUnderscore = SearchValues.Create("-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz");
/// <summary>Supports searching for the string "%(".</summary>
internal static readonly SearchValues<string> s_indexOfString_94B03BD84CB7586977A2210FE884942D27D98E0A78FE4AE6F6D7879F63DC7C3B = SearchValues.Create(["%("], StringComparison.Ordinal); "((?<=@\\(\\s*[A-Za-z_][A-Za-z_0-9\\-]*\\s*-> ..." (3976 uses)[GeneratedRegex("((?<=@\\(\\s*[A-Za-z_][A-Za-z_0-9\\-]*\\s*->\\s*'[^']*)%\\(\\s* (?<ITEM_SPECIFICATION>(?<ITEM_TYPE>[A-Za-z_][A-Za-z_0-9\\-]*)\\s*\\.\\s*)? (?<NAME>[A-Za-z_][A-Za-z_0-9\\-]*) \\s*\\)(?![^']*'(\\s*,\\s*'[^']*')?\\s*\\))) | ((?<!@\\(\\s*[A-Za-z_][A-Za-z_0-9\\-]*\\s*->\\s*'[^']*)%\\(\\s* (?<ITEM_SPECIFICATION>(?<ITEM_TYPE>[A-Za-z_][A-Za-z_0-9\\-]*)\\s*\\.\\s*)? (?<NAME>[A-Za-z_][A-Za-z_0-9\\-]*) \\s*\\)(?=[^']*'(\\s*,\\s*'[^']*')?\\s*\\))) | ((?<!@\\(\\s*[A-Za-z_][A-Za-z_0-9\\-]*\\s*->\\s*'[^']*)%\\(\\s* (?<ITEM_SPECIFICATION>(?<ITEM_TYPE>[A-Za-z_][A-Za-z_0-9\\-]*)\\s*\\.\\s*)? (?<NAME>[A-Za-z_][A-Za-z_0-9\\-]*) \\s*\\)(?![^']*'(\\s*,\\s*'[^']*')?\\s*\\)))", RegexOptions.ExplicitCapture | RegexOptions.IgnorePatternWhitespace)] // Match a character in the set [-0-9A-Z_a-z] atomically any number of times.
{
- int iteration6 = slice.Slice(1).IndexOfAnyExcept(Utilities.s_ascii_20FF03FEFFFF87FEFFFF07);
+ int iteration6 = slice.Slice(1).IndexOfAnyExcept(Utilities.s_asciiLettersAndDigitsAndDashUnderscore);
if (iteration6 < 0)
{
iteration6 = slice.Length - 1;
// Match a character in the set [-0-9A-Z_a-z] atomically any number of times.
{
- int iteration9 = slice.Slice(1).IndexOfAnyExcept(Utilities.s_ascii_20FF03FEFFFF87FEFFFF07);
+ int iteration9 = slice.Slice(1).IndexOfAnyExcept(Utilities.s_asciiLettersAndDigitsAndDashUnderscore);
if (iteration9 < 0)
{
iteration9 = slice.Length - 1;
// Match a character in the set [-0-9A-Z_a-z] atomically any number of times.
{
- int iteration22 = slice.Slice(1).IndexOfAnyExcept(Utilities.s_ascii_20FF03FEFFFF87FEFFFF07);
+ int iteration22 = slice.Slice(1).IndexOfAnyExcept(Utilities.s_asciiLettersAndDigitsAndDashUnderscore);
if (iteration22 < 0)
{
iteration22 = slice.Length - 1;
// Match a character in the set [-0-9A-Z_a-z] atomically any number of times.
{
- int iteration25 = slice.Slice(1).IndexOfAnyExcept(Utilities.s_ascii_20FF03FEFFFF87FEFFFF07);
+ int iteration25 = slice.Slice(1).IndexOfAnyExcept(Utilities.s_asciiLettersAndDigitsAndDashUnderscore);
if (iteration25 < 0)
{
iteration25 = slice.Length - 1;
// Match a character in the set [-0-9A-Z_a-z] atomically any number of times.
{
- int iteration38 = slice.Slice(1).IndexOfAnyExcept(Utilities.s_ascii_20FF03FEFFFF87FEFFFF07);
+ int iteration38 = slice.Slice(1).IndexOfAnyExcept(Utilities.s_asciiLettersAndDigitsAndDashUnderscore);
if (iteration38 < 0)
{
iteration38 = slice.Length - 1;
// Match a character in the set [-0-9A-Z_a-z] atomically any number of times.
{
- int iteration41 = slice.Slice(1).IndexOfAnyExcept(Utilities.s_ascii_20FF03FEFFFF87FEFFFF07);
+ int iteration41 = slice.Slice(1).IndexOfAnyExcept(Utilities.s_asciiLettersAndDigitsAndDashUnderscore);
if (iteration41 < 0)
{
iteration41 = slice.Length - 1;
}
/// <summary>Supports searching for characters in or not in "-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz".</summary>
- internal static readonly SearchValues<char> s_ascii_20FF03FEFFFF87FEFFFF07 = SearchValues.Create("-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz");
+ internal static readonly SearchValues<char> s_asciiLettersAndDigitsAndDashUnderscore = SearchValues.Create("-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz");
/// <summary>Supports searching for the string "%(".</summary>
internal static readonly SearchValues<string> s_indexOfString_94B03BD84CB7586977A2210FE884942D27D98E0A78FE4AE6F6D7879F63DC7C3B = SearchValues.Create(["%("], StringComparison.Ordinal); "(?<PREFIX>@\\(\\s*)\r\n (?<TY ..." (3976 uses)[GeneratedRegex("(?<PREFIX>@\\(\\s*)\r\n (?<TYPE>[A-Za-z_][A-Za-z_0-9\\-]*)\r\n (?<TRANSFORM_SPECIFICATION>(?<ARROW>\\s*->\\s*)(?<TRANSFORM>'[^']*'))\r\n (?<SEPARATOR_SPECIFICATION>\\s*,\\s*'[^']*')?\r\n (?<SUFFIX>\\s*\\))", RegexOptions.ExplicitCapture | RegexOptions.IgnorePatternWhitespace)] slice = inputSpan.Slice(pos);
charloop_starting_pos = pos;
- int iteration1 = slice.IndexOfAnyExcept(Utilities.s_ascii_20FF03FEFFFF87FEFFFF07);
+ int iteration1 = slice.IndexOfAnyExcept(Utilities.s_asciiLettersAndDigitsAndDashUnderscore);
if (iteration1 < 0)
{
iteration1 = slice.Length;
}
/// <summary>Supports searching for characters in or not in "-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz".</summary>
- internal static readonly SearchValues<char> s_ascii_20FF03FEFFFF87FEFFFF07 = SearchValues.Create("-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz");
+ internal static readonly SearchValues<char> s_asciiLettersAndDigitsAndDashUnderscore = SearchValues.Create("-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz");
/// <summary>Supports searching for the string "@(".</summary>
internal static readonly SearchValues<string> s_indexOfString_C7DF9B147331011DE7321CD7E62F0D799941853E78E1507F38B4C54A3047DBAA = SearchValues.Create(["@("], StringComparison.Ordinal); "@\\(\\s*\r\n ([A-Za-z_][A-Za- ..." (3976 uses)[GeneratedRegex("@\\(\\s*\r\n ([A-Za-z_][A-Za-z_0-9\\-]*)\r\n (\\s*->\\s*'[^']*')\r\n (\\s*,\\s*'[^']*')?\r\n \\s*\\)", RegexOptions.ExplicitCapture | RegexOptions.IgnorePatternWhitespace)] slice = inputSpan.Slice(pos);
charloop_starting_pos = pos;
- int iteration1 = slice.IndexOfAnyExcept(Utilities.s_ascii_20FF03FEFFFF87FEFFFF07);
+ int iteration1 = slice.IndexOfAnyExcept(Utilities.s_asciiLettersAndDigitsAndDashUnderscore);
if (iteration1 < 0)
{
iteration1 = slice.Length;
}
/// <summary>Supports searching for characters in or not in "-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz".</summary>
- internal static readonly SearchValues<char> s_ascii_20FF03FEFFFF87FEFFFF07 = SearchValues.Create("-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz");
+ internal static readonly SearchValues<char> s_asciiLettersAndDigitsAndDashUnderscore = SearchValues.Create("-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz");
/// <summary>Supports searching for the string "@(".</summary>
internal static readonly SearchValues<string> s_indexOfString_C7DF9B147331011DE7321CD7E62F0D799941853E78E1507F38B4C54A3047DBAA = SearchValues.Create(["@("], StringComparison.Ordinal); "^!([0-9A-Za-z_\\-]*!)?$" (3395 uses)[GeneratedRegex("^!([0-9A-Za-z_\\-]*!)?$")] // Match a character in the set [-0-9A-Z_a-z] atomically any number of times.
{
- int iteration = slice.IndexOfAnyExcept(Utilities.s_ascii_20FF03FEFFFF87FEFFFF07);
+ int iteration = slice.IndexOfAnyExcept(Utilities.s_asciiLettersAndDigitsAndDashUnderscore);
if (iteration < 0)
{
iteration = slice.Length;
}
/// <summary>Supports searching for characters in or not in "-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz".</summary>
- internal static readonly SearchValues<char> s_ascii_20FF03FEFFFF87FEFFFF07 = SearchValues.Create("-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz");
+ internal static readonly SearchValues<char> s_asciiLettersAndDigitsAndDashUnderscore = SearchValues.Create("-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz");
}
} "^[a-zA-Z0-9\\.]+$" (2606 uses)[GeneratedRegex("^[a-zA-Z0-9\\.]+$")] // Match a character in the set [.0-9A-Za-z] atomically at least once.
{
- int iteration = slice.IndexOfAnyExcept(Utilities.s_ascii_40FF03FEFFFF07FEFFFF07);
+ int iteration = slice.IndexOfAnyExcept(Utilities.s_asciiLettersAndDigitsAndDot);
if (iteration < 0)
{
iteration = slice.Length;
internal static readonly bool s_hasTimeout = s_defaultTimeout != Regex.InfiniteMatchTimeout;
/// <summary>Supports searching for characters in or not in ".0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz".</summary>
- internal static readonly SearchValues<char> s_ascii_40FF03FEFFFF07FEFFFF07 = SearchValues.Create(".0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
+ internal static readonly SearchValues<char> s_asciiLettersAndDigitsAndDot = SearchValues.Create(".0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
}
} "^[a-zA-Z0-9\\.\\-]+$" (2029 uses)[GeneratedRegex("^[a-zA-Z0-9\\.\\-]+$", RegexOptions.CultureInvariant)] // Match a character in the set [-.0-9A-Za-z] atomically at least once.
{
- int iteration = slice.IndexOfAnyExcept(Utilities.s_ascii_60FF03FEFFFF07FEFFFF07);
+ int iteration = slice.IndexOfAnyExcept(Utilities.s_asciiLettersAndDigitsAndDashDot);
if (iteration < 0)
{
iteration = slice.Length;
internal static readonly bool s_hasTimeout = s_defaultTimeout != Regex.InfiniteMatchTimeout;
/// <summary>Supports searching for characters in or not in "-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz".</summary>
- internal static readonly SearchValues<char> s_ascii_60FF03FEFFFF07FEFFFF07 = SearchValues.Create("-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
+ internal static readonly SearchValues<char> s_asciiLettersAndDigitsAndDashDot = SearchValues.Create("-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
}
} For more diff examples, see https://gist.github.com/MihuBot/6ccb8c05f6a026fb0623092f15480f6d
Sample source code for further analysisconst string JsonPath = "RegexResults-630.json";
if (!File.Exists(JsonPath))
{
await using var archiveStream = await new HttpClient().GetStreamAsync("https://runtimeutils.blob.core.windows.net/artifacts/Ecf4gctAAAE/Results.zip");
using var archive = new ZipArchive(archiveStream, ZipArchiveMode.Read);
archive.Entries.First(e => e.Name == "Results.json").ExtractToFile(JsonPath);
}
using FileStream jsonFileStream = File.OpenRead(JsonPath);
RegexEntry[] entries = JsonSerializer.Deserialize<RegexEntry[]>(jsonFileStream, new JsonSerializerOptions { IncludeFields = true })!;
Console.WriteLine($"Working with {entries.Length} patterns");
record KnownPattern(string Pattern, RegexOptions Options, int Count);
sealed class RegexEntry
{
public required KnownPattern Regex { get; set; }
public required string MainSource { get; set; }
public required string PrSource { get; set; }
public string? FullDiff { get; set; }
public string? ShortDiff { get; set; }
public (string Name, string Values)[]? SearchValuesOfChar { get; set; }
public (string[] Values, StringComparison ComparisonType)[]? SearchValuesOfString { get; set; }
} |
Nothing immediately stands out to me from the list of values that are being used, but having the automation in place at least makes it easier to query which patterns a given change in SearchValues would apply to, e.g. #106900 (comment) |
commit 7ae87de Author: Larry Ewing <[email protected]> Date: Mon Sep 9 22:11:12 2024 -0500 [wasm] more cases when looking up unmanaged delegates (dotnet#107113) Make the association between the wasm_native_to_interp_ftndescs generation and the lookup from unmanaged more robust so that we don't see problems like dotnet#107212 where the same slot was being reused for multiple methods with different signatures. To do this we change the Key(s) we use and fix the string escaping it relies on, and attempt to lookup by token first. Next , we rewrite the C code generation to make it easier to read and modify and mitigate some potentially negative memory side effects of that we introduce a gratuitous custom text writer that understands the idea of concatenated strings and use that where possible when building the output. Next, we change the import code generation to use binary rather than linear search for both the module and symbol. And finally, we update the ICall table generation to use the extensions. part of dotnet#104391 and dotnet#107212 commit 1808129 Author: Elinor Fung <[email protected]> Date: Mon Sep 9 20:03:34 2024 -0700 Remove FCThrowRes from AssemblyNative::IsDynamic (dotnet#107574) commit 5cb6a06 Author: Aman Khalid <[email protected]> Date: Tue Sep 10 02:38:23 2024 +0000 JIT: Add simple late layout pass (dotnet#107483) commit c762b75 Author: Martin Costello <[email protected]> Date: Tue Sep 10 03:15:53 2024 +0100 Add [DebuggerDisplay] to CancellationTokenSource (dotnet#105764) * Add [DebuggerDisplay] to CancellationTokenSource Add `[DebuggerDisplay]` to `CancellationTokenSource` to show whether cancelled or disposed. Relates to dotnet#105698. * Update src/libraries/System.Private.CoreLib/src/System/Threading/CancellationTokenSource.cs --------- Co-authored-by: Stephen Toub <[email protected]> commit b77b71e Author: Katelyn Gadd <[email protected]> Date: Mon Sep 9 17:40:14 2024 -0700 [wasm] Clean up some FIXMEs in the jiterpreter (dotnet#107562) * Cleanup some fixmes in the jiterpreter * Flow through size of the var in MINT_LDLOCA_S so jiterpreter can do accurate invalidation commit c21d90e Author: Pavel Savara <[email protected]> Date: Tue Sep 10 02:40:00 2024 +0200 [WASI] improve single-threaded threadpool (dotnet#107395) * fix dotnet#104803 * PollWasiEventLoopUntilResolvedVoid * more * wip * CPU-bound work to do * fix exit * Update src/mono/sample/wasi/http-p2/Program.cs Co-authored-by: Larry Ewing <[email protected]> * feedback --------- Co-authored-by: Larry Ewing <[email protected]> commit 61de5df Author: Elinor Fung <[email protected]> Date: Mon Sep 9 17:14:07 2024 -0700 Make DAC and ProfToEEInterfaceImpl stop using BaseDomain (dotnet#107570) `BaseDomain` should no longer be needed now that we only have one `AppDomain` and the `SystemDomain` can be treated as separate. This makes the DAC and ProfToEEInterfaceImpl use `AppDomain` directly and check against `SystemDomain::System()` to determine if a pointer is the system domain. commit 76dbb27 Author: Stephen Toub <[email protected]> Date: Mon Sep 9 19:59:54 2024 -0400 Use SearchValues in Uri.CheckForUnicodeOrEscapedUnreserved (dotnet#107357) commit 149d4bb Author: Miha Zupan <[email protected]> Date: Mon Sep 9 16:54:00 2024 -0700 Extend the list of recognized SearchValues<char> field names in Regex (dotnet#107402) commit e591fbf Author: Kunal Pathak <[email protected]> Date: Mon Sep 9 16:38:42 2024 -0700 Arm: Fix the base register used for restoring register from stack (dotnet#107564) * Use correct baseReg for vstr, similar to vldr * add test cases * Mark internal test methods private commit 51c350c Author: Elinor Fung <[email protected]> Date: Mon Sep 9 16:35:02 2024 -0700 Make missing framework error message list other architectures that were found (dotnet#107156) When erroring on a missing framework, check if there are versions of the framework for other architectures and list them for the user. commit 2ed43b6 Author: Alan Hayward <[email protected]> Date: Mon Sep 9 23:53:45 2024 +0100 ARM64-SVE: Allow op inside conditionalselect to be non HWintrinsic (dotnet#107180) * ARM64-SVE: Allow op inside conditionselect to be non HWintrinsic * Add Sve.IsSupported check to test commit ac4b7c6 Author: Kunal Pathak <[email protected]> Date: Mon Sep 9 15:52:00 2024 -0700 Arm: Consider the fact that targetReg can be second half during resolution (dotnet#107493) * Arm: Consider the fact that targetReg can be second half during resolution * add test case * Make sure we only handle float registers * fix test case's public methods commit 18eedbe Author: Aaron Robinson <[email protected]> Date: Mon Sep 9 14:02:51 2024 -0700 Convert Thread FCalls to QCalls (dotnet#107495) * Convert Thread.IsAlive property * Convert Thread.GetCurrentThread() * Convert Thread.ThreadState property * Convert Thread.Initialize() commit d45ccfd Author: Michal Strehovský <[email protected]> Date: Tue Sep 10 05:28:57 2024 +0900 Fix reflection-calling `Set` method on arrays (dotnet#107529) The test added in dotnet#106787 found an issue in the implementation of reflection calls to array `Set` methods. We used to throw the wrong exception type. There were probably other corner case bugs (like what exception is thrown when both element type is wrong and index is out of range and when/how value coercion should happen). This should fix that. commit c534080 Author: Tom McDonald <[email protected]> Date: Mon Sep 9 15:21:41 2024 -0400 Avoid using OpenThread for out of process SetThreadContext debugging (dotnet#107511) * Avoid using OpenThread in out of process thread context scenarios * Add comments * Update src/coreclr/debug/di/process.cpp Co-authored-by: mikelle-rogers <[email protected]> * Update src/coreclr/debug/di/process.cpp Co-authored-by: mikelle-rogers <[email protected]> * Update src/coreclr/debug/di/process.cpp Co-authored-by: Noah Falk <[email protected]> --------- Co-authored-by: mikelle-rogers <[email protected]> Co-authored-by: Noah Falk <[email protected]> commit d2c7db0 Author: Tanner Gooding <[email protected]> Date: Mon Sep 9 11:06:45 2024 -0700 Disable TensorExtensionsTwoSpanInFloatOut due to dotnet#107254 (dotnet#107555) commit b7b91cb Author: Aaron Robinson <[email protected]> Date: Mon Sep 9 09:08:31 2024 -0700 Convert some handle APIs to QCalls (dotnet#107513) Convert RuntimeTypeHandle.GetAssembly() Convert RuntimeTypeHandle.GetModule() Convert RuntimeAssembly.GetManifestModule() commit 600f6bd Author: David Wrighton <[email protected]> Date: Mon Sep 9 09:04:51 2024 -0700 Fix thread static cleanup paths (dotnet#107438) * Fix thread static cleanup paths - Do not destroy GC handles while holding the spin lock - Free the pLoaderHandle array when the thread is terminated * When using a ThreadStatics stress test on collectible assemblies, a few more issues were found - Fix issue where the LoaderAllocator's SegmentedHandleIndex wasn't being freed - Fix issue where the logic to re-use TLSIndex values wasn't working properly commit fe7a52d Author: Linus Hamlin <[email protected]> Date: Mon Sep 9 17:57:31 2024 +0200 Remove ActiveIssue for solved issues in Vector tests (dotnet#107127) commit 0c33c6f Author: Elinor Fung <[email protected]> Date: Mon Sep 9 08:21:16 2024 -0700 Fix module being set as tenured too early (dotnet#107489) commit 2fb3629 Author: Elinor Fung <[email protected]> Date: Mon Sep 9 08:03:27 2024 -0700 Remove `BaseDomain` use in `LoaderAllocator` and event tracing helpers (dotnet#107481) - Remove `BaseDomain` member on `LoaderAllocator` - Add asserts in functions using `AppDomain` that the loader allocator is collectible and the type is `LAT_Assembly` (so `AssemblyLoaderAllocator` which always had `AppDomain`) - Remove unnecessary `BaseDomain`/`AppDomain` parameters from event tracing helpers - They were always being called with the current app domain commit 62133e0 Author: dotnet-maestro[bot] <42748379+dotnet-maestro[bot]@users.noreply.github.com> Date: Mon Sep 9 16:56:30 2024 +0200 [main] Update dependencies from dotnet/xharness (dotnet#107291) * Update dependencies from https://github.com/dotnet/xharness build 20240902.2 Microsoft.DotNet.XHarness.CLI , Microsoft.DotNet.XHarness.TestRunners.Common , Microsoft.DotNet.XHarness.TestRunners.Xunit From Version 9.0.0-prerelease.24452.1 -> To Version 9.0.0-prerelease.24452.2 * Update dependencies from https://github.com/dotnet/xharness build 20240903.1 Microsoft.DotNet.XHarness.CLI , Microsoft.DotNet.XHarness.TestRunners.Common , Microsoft.DotNet.XHarness.TestRunners.Xunit From Version 9.0.0-prerelease.24452.2 -> To Version 9.0.0-prerelease.24453.1 * Update dependencies from https://github.com/dotnet/xharness build 20240904.2 Microsoft.DotNet.XHarness.CLI , Microsoft.DotNet.XHarness.TestRunners.Common , Microsoft.DotNet.XHarness.TestRunners.Xunit From Version 9.0.0-prerelease.24453.1 -> To Version 10.0.0-prerelease.24454.2 * Update dependencies from https://github.com/dotnet/xharness build 20240906.1 Microsoft.DotNet.XHarness.CLI , Microsoft.DotNet.XHarness.TestRunners.Common , Microsoft.DotNet.XHarness.TestRunners.Xunit From Version 10.0.0-prerelease.24454.2 -> To Version 10.0.0-prerelease.24456.1 * Update dependencies from https://github.com/dotnet/xharness build 20240909.1 Microsoft.DotNet.XHarness.CLI , Microsoft.DotNet.XHarness.TestRunners.Common , Microsoft.DotNet.XHarness.TestRunners.Xunit From Version 10.0.0-prerelease.24456.1 -> To Version 10.0.0-prerelease.24459.1 --------- Co-authored-by: dotnet-maestro[bot] <dotnet-maestro[bot]@users.noreply.github.com> Co-authored-by: Ilona Tomkowicz <[email protected]> commit 4c0973e Author: Jeremi Kurdek <[email protected]> Date: Mon Sep 9 17:53:45 2024 +0300 Fix passing assemblies using relative path (dotnet#107536) commit 67e5768 Author: Katelyn Gadd <[email protected]> Date: Mon Sep 9 06:19:10 2024 -0700 [wasm] Implement MINT_NEWARR in jiterpreter (dotnet#107430) commit 176754d Author: Matous Kozak <[email protected]> Date: Mon Sep 9 13:35:01 2024 +0200 [mono][infra] decrease CPU count for fullAOT CI build (dotnet#107531) commit 49bf719 Author: Pavel Savara <[email protected]> Date: Mon Sep 9 12:30:47 2024 +0200 [browser][MT] fix feature detection on webworker (dotnet#107452) commit aa418fc Author: Preeyan Parmar <[email protected]> Date: Sun Sep 8 22:44:27 2024 +0100 Remove unused declarations from clsload.hpp (dotnet#107509) * Remove unused declarations from clsload.hpp * also remove unused ClassLoader::TryEnsureLoaded commit 7d68c7f Author: Steve <[email protected]> Date: Mon Sep 9 06:36:18 2024 +0900 Implement getClassAssemblyName (dotnet#106959) * Add getClassAssemblyName * Handle nullptrs * Remove CORINFO_ASSEMBLY_HANDLE * Address feedbacks Co-authored-by: Jan Kotas <[email protected]> commit 39c84a3 Author: Jan Kotas <[email protected]> Date: Sun Sep 8 11:24:13 2024 -0700 Fix corner-case accounting bug in new codeheap allocation (dotnet#107492) The size of internal CodeHeap structures was not included in codeheap memory reservation. It caused false OOM exception to be thrown when JITed method code size was near 64kB multiple commit 10f6c4c Author: Aaron Robinson <[email protected]> Date: Sun Sep 8 11:02:41 2024 -0700 Convert WaitHandle FCalls to QCalls (dotnet#107488) commit b523ec5 Author: Aman Khalid <[email protected]> Date: Sun Sep 8 14:42:04 2024 +0000 JIT: Simplify block insertion logic during loop canonicalization (dotnet#107371)
No functional change, just makes the generated source prettier.
For the
Regex_RealWorldPatterns.json
, this accounts for the most commonly used values.After the change: