diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs index ce09848b867cf..b9b6e791ed572 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs @@ -64,7 +64,6 @@ internal abstract class RegexCompiler private static readonly MethodInfo s_spanSliceIntIntMethod = typeof(ReadOnlySpan).GetMethod("Slice", new Type[] { typeof(int), typeof(int) })!; private static readonly MethodInfo s_spanStartsWith = typeof(MemoryExtensions).GetMethod("StartsWith", new Type[] { typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)) })!.MakeGenericMethod(typeof(char)); private static readonly MethodInfo s_stringAsSpanMethod = typeof(MemoryExtensions).GetMethod("AsSpan", new Type[] { typeof(string) })!; - private static readonly MethodInfo s_spanLastIndexOfMethod = typeof(MemoryExtensions).GetMethod("LastIndexOf", new Type[] { typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)) })!.MakeGenericMethod(typeof(char)); private static readonly MethodInfo s_stringAsSpanIntIntMethod = typeof(MemoryExtensions).GetMethod("AsSpan", new Type[] { typeof(string), typeof(int), typeof(int) })!; private static readonly MethodInfo s_stringGetCharsMethod = typeof(string).GetMethod("get_Chars", new Type[] { typeof(int) })!; private static readonly MethodInfo s_stringIndexOfCharInt = typeof(string).GetMethod("IndexOf", new Type[] { typeof(char), typeof(int) })!; @@ -91,7 +90,6 @@ internal abstract class RegexCompiler private LocalBuilder? _runstackLocal; private LocalBuilder? _textInfoLocal; // cached to avoid extraneous TLS hits from CurrentCulture and virtual calls to TextInfo private LocalBuilder? _loopTimeoutCounterLocal; // timeout counter for setrep and setloop - private LocalBuilder? _maxBacktrackPositionLocal; protected RegexOptions _options; // options protected RegexCode? _code; // the RegexCode object @@ -893,8 +891,6 @@ private void GenerateForwardSection() Mvfldloc(s_runtrackposField, _runtrackposLocal!); Mvfldloc(s_runstackField, _runstackLocal!); Mvfldloc(s_runstackposField, _runstackposLocal!); - Ldc(-1); - Stloc(_maxBacktrackPositionLocal!); _backpos = -1; @@ -1709,7 +1705,7 @@ protected void GenerateFindFirstChar() // if (!CharInClass(textSpan[i + 2], prefix[2], "...")) goto returnFalse; // ... Debug.Assert(charClassIndex == 0 || charClassIndex == 1); - for (; charClassIndex < _leadingCharClasses.Length; charClassIndex++) + for ( ; charClassIndex < _leadingCharClasses.Length; charClassIndex++) { Debug.Assert(needLoop); Ldloca(textSpanLocal); @@ -3314,7 +3310,6 @@ protected void GenerateGo() } _runtextbegLocal = DeclareInt32(); _runtextendLocal = DeclareInt32(); - _maxBacktrackPositionLocal = DeclareInt32(); InitializeCultureForGoIfNecessary(); @@ -4263,61 +4258,7 @@ private void GenerateOneCode() //: break Backward; { string str = _strings![Operand(0)]; - Label multiCode = DefineLabel(); - if (!IsRightToLeft()) - { - // if (runtextend - runtextpos < c) - Ldloc(_runtextendLocal!); - Ldloc(_runtextposLocal!); - Sub(); - Ldc(str.Length); - BgeFar(multiCode); - // if (!caseInsensitive && _maxBacktrackPosition != -1 && runtextpos > _maxBacktrackPosition) - if (!IsCaseInsensitive()) - { - Ldloc(_maxBacktrackPositionLocal!); - Ldc(-1); - BeqFar(_backtrack); - Ldloc(_runtextposLocal!); - Ldloc(_maxBacktrackPositionLocal!); - BleFar(_backtrack); - // runtextpos = _maxBacktrackPosition; - Ldloc(_maxBacktrackPositionLocal!); - Stloc(_runtextposLocal!); - // ReadOnlySpan runtextSpan = runtext.AsSpan(_maxBacktrackPosition, runtextend - _maxBacktractPosition); - Ldloc(_runtextLocal!); - Ldloc(_maxBacktrackPositionLocal!); - Ldloc(_runtextendLocal!); - Ldloc(_maxBacktrackPositionLocal!); - Sub(); - using (RentedLocalBuilder runtextSpanLocal = RentReadOnlySpanCharLocal()) - { - Call(s_stringAsSpanIntIntMethod); - Stloc(runtextSpanLocal); - using (RentedLocalBuilder lastIndexOfLocal = RentInt32Local()) - { - // int lastIndexOf = runtextSpan.LastIndexOf(str.AsSpan()); - Ldloc(runtextSpanLocal); - Ldstr(str); - Call(s_stringAsSpanMethod); - Call(s_spanLastIndexOfMethod); - Stloc(lastIndexOfLocal); - // if (lastIndexOf > -1) - Ldloc(lastIndexOfLocal); - Ldc(-1); - BleFar(_backtrack); - // runtextpos = lastIndexOf + _maxBacktrackPosition; - Ldloc(lastIndexOfLocal); - Ldloc(_maxBacktrackPositionLocal!); - Add(); - Stloc(_runtextposLocal!); - BrFar(_backtrack); - } - } - } - } - MarkLabel(multiCode); Ldc(str.Length); Ldloc(_runtextendLocal!); Ldloc(_runtextposLocal!); @@ -4657,9 +4598,6 @@ private void GenerateOneCode() using RentedLocalBuilder lenLocal = RentInt32Local(); using RentedLocalBuilder iLocal = RentInt32Local(); - using RentedLocalBuilder tempMaxBacktrackPositionLocal = RentInt32Local(); - Ldloc(_runtextposLocal!); - Stloc(tempMaxBacktrackPositionLocal); if (!IsRightToLeft()) { @@ -4909,12 +4847,6 @@ private void GenerateOneCode() DoPush(); Track(); - // if (_operator == RegexCode.Notoneloop) maxBacktrackPosition = tempMaxBacktrackPosition - if (_regexopcode == RegexCode.Notoneloop) - { - Ldloc(tempMaxBacktrackPositionLocal); - Stloc(_maxBacktrackPositionLocal!); - } } break; } @@ -4938,66 +4870,28 @@ private void GenerateOneCode() //: if (i > 0) //: Track(i - 1, pos - 1); //: Advance(2); - Label noBacktrackPositionBranch = DefineLabel(); + PopTrack(); + Stloc(_runtextposLocal!); PopTrack(); using (RentedLocalBuilder posLocal = RentInt32Local()) { Stloc(posLocal); - PopTrack(); - using (RentedLocalBuilder iBacktrackLocal = RentInt32Local()) - { - Stloc(iBacktrackLocal); - // if (!caseInsensitive && maxBacktrackPosition != -1 && pos > maxBacktrackPosition && runtextpos < pos && _operator == (RegexCode.Notoneloop | RegexCode.Back) && !_rightToLeft) - if (!IsCaseInsensitive() && _regexopcode == (RegexCode.Notoneloop | RegexCode.Back) && !IsRightToLeft()) - { - Ldloc(_maxBacktrackPositionLocal!); - Ldc(-1); - Beq(noBacktrackPositionBranch); - Ldloc(posLocal); - Ldloc(_maxBacktrackPositionLocal!); - Ble(noBacktrackPositionBranch); - Ldloc(_runtextposLocal!); - Ldloc(posLocal); - Bge(noBacktrackPositionBranch); - /* - int difference = pos - maxBacktrackPosition; - pos = runtextpos; - i -= difference; - maxBacktrackPosition = -1; - */ - // int difference = pos - maxBacktrackPosition; - Ldloc(iBacktrackLocal); - Ldloc(posLocal); - Ldloc(_maxBacktrackPositionLocal!); - Sub(); - Sub(); - Stloc(iBacktrackLocal); - Ldloc(_runtextposLocal!); - Stloc(posLocal); - Ldc(-1); - Stloc(_maxBacktrackPositionLocal!); - } - - MarkLabel(noBacktrackPositionBranch); - Ldloc(posLocal); - Stloc(_runtextposLocal!); - Ldloc(iBacktrackLocal); - Ldc(0); - BleFar(AdvanceLabel()); - ReadyPushTrack(); - Ldloc(iBacktrackLocal); - } - Ldc(1); - Sub(); - DoPush(); + Ldloc(posLocal); + Ldc(0); + BleFar(AdvanceLabel()); ReadyPushTrack(); - Ldloc(_runtextposLocal!); - Ldc(1); - Sub(IsRightToLeft()); - DoPush(); - Trackagain(); - Advance(); + Ldloc(posLocal); } + Ldc(1); + Sub(); + DoPush(); + ReadyPushTrack(); + Ldloc(_runtextposLocal!); + Ldc(1); + Sub(IsRightToLeft()); + DoPush(); + Trackagain(); + Advance(); break; case RegexCode.Onelazy: diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs index 3e05926733fe9..d557ec6c3aa73 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs @@ -20,7 +20,6 @@ internal sealed class RegexInterpreter : RegexRunner private int _codepos; private bool _rightToLeft; private bool _caseInsensitive; - private int _maxBacktrackPosition = -1; public RegexInterpreter(RegexCode code, CultureInfo culture) { @@ -224,20 +223,6 @@ private bool MatchString(string str) { if (runtextend - runtextpos < c) { - // If MatchString was called after a greedy op such as a .*, we would have zipped runtextpos to the end without really examining any characters. Reset to maxBacktrackPos here as an optimization - if (!_caseInsensitive && _maxBacktrackPosition != -1 && runtextpos > _maxBacktrackPosition) - { - // If lastIndexOf is -1, we backtrack to the max extent possible. - runtextpos = _maxBacktrackPosition; - ReadOnlySpan runtextSpan = runtext.AsSpan(_maxBacktrackPosition, runtextend - _maxBacktrackPosition); - int lastIndexOf = runtextSpan.LastIndexOf(str); - if (lastIndexOf > -1) - { - // Found the next position to match. Move runtextpos here - runtextpos = _maxBacktrackPosition + lastIndexOf; - } - } - return false; } @@ -1200,7 +1185,6 @@ protected override void Go() int len = Math.Min(Operand(1), Forwardchars()); char ch = (char)Operand(0); int i; - int tempMaxBacktrackPosition = runtextpos; if (!_rightToLeft && !_caseInsensitive) { @@ -1233,7 +1217,6 @@ protected override void Go() if (len > i && _operator == RegexCode.Notoneloop) { TrackPush(len - i - 1, runtextpos - Bump()); - _maxBacktrackPosition = tempMaxBacktrackPosition; } } advance = 2; @@ -1278,16 +1261,6 @@ protected override void Go() { int i = TrackPeek(); int pos = TrackPeek(1); - if (!_caseInsensitive && _maxBacktrackPosition != -1 && pos > _maxBacktrackPosition && runtextpos < pos && _operator == (RegexCode.Notoneloop | RegexCode.Back) && !_rightToLeft) - { - // The Multi node has bumped us along already - int difference = pos - _maxBacktrackPosition; - Debug.Assert(difference > 0); - pos = runtextpos; - i -= difference; - // We shouldn't be backtracking anymore. - _maxBacktrackPosition = -1; - } runtextpos = pos; if (i > 0) { diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs index 723fc68e53bab..719f280c91726 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs @@ -5,7 +5,6 @@ using System.Diagnostics; using System.Globalization; using System.Linq; -using System.Reflection; using System.Tests; using Microsoft.DotNet.RemoteExecutor; using Xunit; @@ -205,11 +204,9 @@ public static IEnumerable Match_Basic_TestData() yield return new object[] { "abc", "abc", RegexOptions.None, 0, 3, true, "abc" }; yield return new object[] { "abc", "aBc", RegexOptions.None, 0, 3, false, string.Empty }; yield return new object[] { "abc", "aBc", RegexOptions.IgnoreCase, 0, 3, true, "aBc" }; - yield return new object[] { @"abc.*def", "abcghiDEF", RegexOptions.IgnoreCase, 0, 9, true, "abcghiDEF" }; // Using *, +, ?, {}: Actual - "a+\\.?b*\\.?c{2}" yield return new object[] { @"a+\.?b*\.+c{2}", "ab.cc", RegexOptions.None, 0, 5, true, "ab.cc" }; - yield return new object[] { @"[^a]+\.[^z]+", "zzzzz", RegexOptions.None, 0, 5, false, string.Empty }; // RightToLeft yield return new object[] { @"\s+\d+", "sdf 12sad", RegexOptions.RightToLeft, 0, 9, true, " 12" }; @@ -384,62 +381,6 @@ public static IEnumerable Match_Basic_TestData() yield return new object[] { "\u05D0(?:\u05D1|\u05D2|\u05D3)", "\u05D0\u05D2", options, 0, 2, true, "\u05D0\u05D2" }; yield return new object[] { "\u05D0(?:\u05D1|\u05D2|\u05D3)", "\u05D0\u05D4", options, 0, 0, false, "" }; } - - // .* Perf Optimization: Case sensitive - foreach (RegexOptions options in new[] { RegexOptions.None, RegexOptions.Compiled }) - { - yield return new object[] { @".*\nfoo", "This shouldn't match", options, 0, 20, false, "" }; - yield return new object[] { @"a.*\nfoo", "This shouldn't match", options, 0, 20, false, "" }; - yield return new object[] { @".*\nFoo", $"\nFooThis should match", options, 0, 21, true, "\nFoo" }; - yield return new object[] { @".*\nfoo", "\nfooThis should match", options, 4, 17, false, "" }; - - yield return new object[] { @".*\dfoo", "This shouldn't match", options, 0, 20, false, "" }; - yield return new object[] { @".*\dFoo", "This1Foo should match", options, 0, 21, true, "This1Foo" }; - yield return new object[] { @".*\dFoo", "This1foo should 2Foo match", options, 0, 26, true, "This1foo should 2Foo" }; - yield return new object[] { @".*\dFoo", "This1foo shouldn't 2foo match", options, 0, 29, false, "" }; - yield return new object[] { @".*\dfoo", "This1foo shouldn't 2foo match", options, 24, 5, false, "" }; - - yield return new object[] { @".*\dfoo", "1fooThis1foo should 1foo match", options, 4, 9, true, "This1foo" }; - yield return new object[] { @".*\dfoo", "This shouldn't match 1foo", options, 0, 20, false, "" }; - } - - // .* Perf Optimization: Case insensitive - foreach (RegexOptions options in new[] { RegexOptions.IgnoreCase, RegexOptions.IgnoreCase | RegexOptions.Compiled }) - { - yield return new object[] { @".*\nFoo", "\nfooThis should match", options, 0, 21, true, "\nfoo" }; - yield return new object[] { @".*\dFoo", "This1foo should match", options, 0, 21, true, "This1foo" }; - yield return new object[] { @".*\dFoo", "This1foo should 2FoO match", options, 0, 26, true, "This1foo should 2FoO" }; - yield return new object[] { @".*\dFoo", "This1Foo should 2fOo match", options, 0, 26, true, "This1Foo should 2fOo" }; - yield return new object[] { @".*\dfoo", "1fooThis1FOO should 1foo match", options, 4, 9, true, "This1FOO" }; - } - - // .* Perf Optimization: RTL, Case-sensitive - foreach (RegexOptions options in new[] { RegexOptions.None | RegexOptions.RightToLeft, RegexOptions.Compiled | RegexOptions.RightToLeft }) - { - yield return new object[] { @".*\nfoo", "This shouldn't match", options, 0, 20, false, "" }; - yield return new object[] { @"a.*\nfoo", "This shouldn't match", options, 0, 20, false, "" }; - yield return new object[] { @".*\nFoo", $"This should match\nFoo", options, 0, 21, true, "This should match\nFoo" }; - yield return new object[] { @".*\nfoo", "This should matchfoo\n", options, 4, 13, false, "" }; - - yield return new object[] { @".*\dfoo", "This shouldn't match", options, 0, 20, false, "" }; - yield return new object[] { @".*\dFoo", "This1Foo should match", options, 0, 21, true, "This1Foo" }; - yield return new object[] { @".*\dFoo", "This1foo should 2Foo match", options, 0, 26, true, "This1foo should 2Foo" }; - yield return new object[] { @".*\dFoo", "This1foo shouldn't 2foo match", options, 0, 29, false, "" }; - yield return new object[] { @".*\dfoo", "This1foo shouldn't 2foo match", options, 19, 0, false, "" }; - - yield return new object[] { @".*\dfoo", "1fooThis2foo should 1foo match", options, 8, 4, true, "2foo" }; - yield return new object[] { @".*\dfoo", "This shouldn't match 1foo", options, 0, 20, false, "" }; - } - - // .* Perf Optimization: RTL, case insensitive - foreach (RegexOptions options in new[] { RegexOptions.IgnoreCase | RegexOptions.RightToLeft, RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.RightToLeft }) - { - yield return new object[] { @".*\nFoo", "\nfooThis should match", options, 0, 21, true, "\nfoo" }; - yield return new object[] { @".*\dFoo", "This1foo should match", options, 0, 21, true, "This1foo" }; - yield return new object[] { @".*\dFoo", "This1foo should 2FoO match", options, 0, 26, true, "This1foo should 2FoO" }; - yield return new object[] { @".*\dFoo", "This1Foo should 2fOo match", options, 0, 26, true, "This1Foo should 2fOo" }; - yield return new object[] { @".*\dfoo", "1fooThis2FOO should 1foo match", options, 8, 4, true, "2FOO" }; - } } public static IEnumerable Match_Basic_TestData_NetCore() @@ -676,7 +617,7 @@ public void Match_Timeout_Loop_Throws(string pattern, RegexOptions options) public void Match_Timeout_Repetition_Throws(RegexOptions options) { int repetitionCount = 800_000_000; - var regex = new Regex(@"a\s{" + repetitionCount + "}", options, TimeSpan.FromSeconds(1)); + var regex = new Regex(@"a\s{" + repetitionCount+ "}", options, TimeSpan.FromSeconds(1)); string input = @"a" + new string(' ', repetitionCount) + @"b"; Assert.Throws(() => regex.Match(input)); }