From 8ae3796ef109962725833610c26e3f0bb06e2988 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Tue, 17 Sep 2024 12:49:53 -0400 Subject: [PATCH] Fix RegexOptions.Compiled|IgnoreCase perf when dynamic code isn't supported (#107874) If a regex is created with RegexOptions.Compiled and RegexOptions.IgnoreCase, and it begins with a pattern that's a reasonably small number of alternating strings, it'll now end up using `SearchValues` to find the next possible match location. However, the `SearchValues` instance doesn't end up getting created if the interpreter is being used. If the implementation falls back to the interpreter because compilation isn't supported because dynamic code isn't supported, then it won't use any optimizations to find the next starting location. That's a regression from when it would previously at least use a vectorized search to find one character class from the set of starting strings. This fixes it to just always create the `SearchValues`. This adds some overhead when using RegexOptions.Compiled, but it's typically just a few percentage points, and only applies in the cases where this `SearchValues` optimization kicks in. At the moment, changing it to have perfect knowledge about whether it can avoid that creation is too invasive. This overhead also doesn't apply to the source generator. --- .../RegexFindOptimizations.cs | 20 ++++--------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs index 1318769136235..1291dff977e9f 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs @@ -84,7 +84,6 @@ public RegexFindOptimizations(RegexNode root, RegexOptions options) bool dfa = (options & RegexOptions.NonBacktracking) != 0; bool compiled = (options & RegexOptions.Compiled) != 0 && !dfa; // for now, we never generate code for NonBacktracking, so treat it as non-compiled bool interpreter = !compiled && !dfa; - bool usesRfoTryFind = !compiled; // For interpreter, we want to employ optimizations, but we don't want to make construction significantly // more expensive; someone who wants to pay to do more work can specify Compiled. So for the interpreter @@ -149,10 +148,7 @@ public RegexFindOptimizations(RegexNode root, RegexOptions options) LeadingPrefixes = caseInsensitivePrefixes; FindMode = FindNextStartingPositionMode.LeadingStrings_OrdinalIgnoreCase_LeftToRight; #if SYSTEM_TEXT_REGULAREXPRESSIONS - if (usesRfoTryFind) - { - LeadingStrings = SearchValues.Create(LeadingPrefixes, StringComparison.OrdinalIgnoreCase); - } + LeadingStrings = SearchValues.Create(LeadingPrefixes, StringComparison.OrdinalIgnoreCase); #endif return; } @@ -165,10 +161,7 @@ public RegexFindOptimizations(RegexNode root, RegexOptions options) // LeadingPrefixes = caseSensitivePrefixes; // FindMode = FindNextStartingPositionMode.LeadingStrings_LeftToRight; #if SYSTEM_TEXT_REGULAREXPRESSIONS - // if (usesRfoTryFind) - // { - // LeadingStrings = SearchValues.Create(LeadingPrefixes, StringComparison.Ordinal); - // } + // LeadingStrings = SearchValues.Create(LeadingPrefixes, StringComparison.Ordinal); #endif // return; //} @@ -699,14 +692,9 @@ public bool TryFindNextStartingPositionLeftToRight(ReadOnlySpan textSpan, case FindNextStartingPositionMode.LeadingStrings_LeftToRight: case FindNextStartingPositionMode.LeadingStrings_OrdinalIgnoreCase_LeftToRight: { - if (LeadingStrings is not SearchValues searchValues) - { - // This should be exceedingly rare and only happen if a Compiled regex selected this - // option but then failed to compile (e.g. due to too deep stacks) and fell back to the interpreter. - return true; - } + Debug.Assert(LeadingStrings is not null); - int i = textSpan.Slice(pos).IndexOfAny(searchValues); + int i = textSpan.Slice(pos).IndexOfAny(LeadingStrings); if (i >= 0) { pos += i;