Skip to content

Commit

Permalink
Addressing some feedback and implementing Count(span) on top of Enume…
Browse files Browse the repository at this point in the history
…rateMatches and cleaning up some code.
  • Loading branch information
joperezr committed Apr 9, 2022
1 parent b838c33 commit aba9a54
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 133 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,10 @@ public int Count(ReadOnlySpan<char> input)
{
int count = 0;

RunAllMatchesWithCallback(input, 0, ref count, static (ref int count, Match match) =>
foreach (ValueMatch _ in EnumerateMatches(input))
{
count++;
return true;
}, reuseMatchObject: true);
}

return count;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;

namespace System.Text.RegularExpressions
Expand Down Expand Up @@ -85,10 +86,9 @@ public ref struct ValueMatchEnumerator
{
private readonly Regex _regex;
private readonly ReadOnlySpan<char> _input;
private ValueMatch _matchReference;
private ValueMatch _current;
private int _startAt;
private int _prevLen;
private bool _hasValidValue;

/// <summary>
/// Creates an instance of the <see cref="ValueMatchEnumerator"/> for the passed in <paramref name="regex"/> which iterates over <paramref name="input"/>.
Expand All @@ -100,10 +100,9 @@ internal ValueMatchEnumerator(Regex regex, ReadOnlySpan<char> input, int startAt
{
_regex = regex;
_input = input;
_matchReference = default;
_current = default;
_startAt = startAt;
_prevLen = -1;
_hasValidValue = false;
}

/// <summary>
Expand All @@ -121,24 +120,22 @@ internal ValueMatchEnumerator(Regex regex, ReadOnlySpan<char> input, int startAt
public bool MoveNext()
{
Match? match = _regex.RunSingleMatch(quick: false, _prevLen, _input, _startAt);
if (match is not null && match != RegularExpressions.Match.Empty)
Debug.Assert(match != null, "Match shouldn't be null because we passed quick = false.");
if (match != RegularExpressions.Match.Empty)
{
_matchReference = new ValueMatch(match);
_hasValidValue = true;
_current = new ValueMatch(match.Index, match.Length);
_startAt = match._textpos;
_prevLen = match.Length;
return true;
}
_hasValidValue = false;
_matchReference = default;
return false;
}

/// <summary>
/// Gets the <see cref="ValueMatch"/> element at the current position of the enumerator.
/// </summary>
/// <exception cref="InvalidOperationException">Enumeration has either not started or has already finished.</exception>
public readonly ValueMatch Current => _hasValidValue ? _matchReference : throw new InvalidOperationException(SR.EnumNotStarted);
public readonly ValueMatch Current => _current;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -458,116 +458,88 @@ internal void RunAllMatchesWithCallback<TState>(string input, int startat, ref T
RegexRunner runner = Interlocked.Exchange(ref _runner, null) ?? CreateRunner();
try
{
// For the string overload, we need to set runtext before starting the match attempts.
// We need to set runtext before starting the match attempts.
runner.runtext = input;
RunAllMatchesWithCallbackHelper(input, startat, ref state, callback, runner, usingStringOverload: true, reuseMatchObject);
}
finally
{
runner.runtext = null; // drop reference to text to avoid keeping it alive in a cache.
_runner = runner;
}
}

/// <summary>Internal worker which will scan the passed in string <paramref name="input"/> for all matches, and will call <paramref name="callback"/> for each match found.</summary>
internal void RunAllMatchesWithCallback<TState>(ReadOnlySpan<char> input, int startat, ref TState state, MatchCallback<TState> callback, bool reuseMatchObject)
{
Debug.Assert((uint)startat <= (uint)input.Length);

RegexRunner runner = Interlocked.Exchange(ref _runner, null) ?? CreateRunner();
try
{
RunAllMatchesWithCallbackHelper(input, startat, ref state, callback, runner, usingStringOverload: false, reuseMatchObject);
}
finally
{
_runner = runner;
}
}

/// <summary>
/// Helper method used by <see cref="RunAllMatchesWithCallback{TState}(string, int, ref TState, MatchCallback{TState}, bool)"/> and
/// <see cref="RunAllMatchesWithCallback{TState}(ReadOnlySpan{char}, int, ref TState, MatchCallback{TState}, bool)"/> which loops to find
/// all matches on the passed in <paramref name="input"/> and calls <paramref name="callback"/> for each match found.
/// </summary>
private void RunAllMatchesWithCallbackHelper<TState>(ReadOnlySpan<char> input, int startat, ref TState state, MatchCallback<TState> callback, RegexRunner runner, bool usingStringOverload, bool reuseMatchObject)
{
runner.InitializeTimeout(internalMatchTimeout);
int runtextpos = startat;
while (true)
{
runner.InitializeForScan(this, input, startat, false);
runner.runtextpos = runtextpos;

int stoppos = RightToLeft ? 0 : input.Length;
runner.InitializeTimeout(internalMatchTimeout);
int runtextpos = startat;
while (true)
{
runner.InitializeForScan(this, input, startat, false);
runner.runtextpos = runtextpos;

// We get the Match by calling Scan. 'input' parameter is used to set the Match text which is only relevante if we are using the Run<TState> string
// overload, as APIs that call the span overload (like Count) don't require match.Text to be set, so we pass null in that case.
Match? match = ScanInternal(reuseMatchObject, input: usingStringOverload ? runner.runtext : null, 0, runner, input, returnNullIfQuick: false);
Debug.Assert(match is not null);
int stoppos = RightToLeft ? 0 : input.Length;

// if we got a match, then call the callback function with the match and prepare for next iteration.
if (match.Success)
{
if (!reuseMatchObject)
{
// We're not reusing match objects, so null out our field reference to the instance.
// It'll be recreated the next time one is needed.
runner.runmatch = null;
}
// We get the Match by calling Scan. 'input' parameter is used to set the Match text.
Match? match = ScanInternal(reuseMatchObject, runner.runtext, 0, runner, input, returnNullIfQuick: false);
Debug.Assert(match is not null);

if (!callback(ref state, match))
// if we got a match, then call the callback function with the match and prepare for next iteration.
if (match.Success)
{
// If the callback returns false, we're done.
if (!reuseMatchObject)
{
// We're not reusing match objects, so null out our field reference to the instance.
// It'll be recreated the next time one is needed.
runner.runmatch = null;
}

if (usingStringOverload && reuseMatchObject)
if (!callback(ref state, match))
{
// We're reusing the single match instance and we were called via the string overload
// which would have set the match's text, so clear it out as well.
// We don't do this if we're not reusing instances, as in that case we're
// dropping the whole reference to the match, and we no longer own the instance
// having handed it out to the callback.
match.Text = null;
// If the callback returns false, we're done.

if (reuseMatchObject)
{
// We're reusing the single match instance so we clear out match.Text which was set above.
// We don't do this if we're not reusing instances, as in that case we're
// dropping the whole reference to the match, and we no longer own the instance
// having handed it out to the callback.
match.Text = null;
}
return;
}
return;
}

// Now that we've matched successfully, update the starting position to reflect
// the current position, just as Match.NextMatch() would pass in _textpos as textstart.
runtextpos = startat = runner.runtextpos;
// Now that we've matched successfully, update the starting position to reflect
// the current position, just as Match.NextMatch() would pass in _textpos as textstart.
runtextpos = startat = runner.runtextpos;

// Reset state for another iteration.
runner.runtrackpos = runner.runtrack!.Length;
runner.runstackpos = runner.runstack!.Length;
runner.runcrawlpos = runner.runcrawl!.Length;
// Reset state for another iteration.
runner.runtrackpos = runner.runtrack!.Length;
runner.runstackpos = runner.runstack!.Length;
runner.runcrawlpos = runner.runcrawl!.Length;

if (match.Length == 0)
{
if (runner.runtextpos == stoppos)
if (match.Length == 0)
{
if (usingStringOverload && reuseMatchObject)
if (runner.runtextpos == stoppos)
{
// See above comment.
match.Text = null;
if (reuseMatchObject)
{
// See above comment.
match.Text = null;
}
return;
}
return;

runtextpos += RightToLeft ? -1 : 1;
}

runtextpos += RightToLeft ? -1 : 1;
// Loop around to perform next match from where we left off.
continue;
}

// Loop around to perform next match from where we left off.
continue;
}
else
{
// We failed to match at this position. If we're at the stopping point, we're done.
if (runner.runtextpos == stoppos)
else
{
return;
// We failed to match at this position. If we're at the stopping point, we're done.
if (runner.runtextpos == stoppos)
{
return;
}
}
}
}
finally
{
runner.runtext = null; // drop reference to text to avoid keeping it alive in a cache.
_runner = runner;
}
}

/// <summary>Helper method used by RunSingleMatch and RunAllMatchesWithCallback which calls runner.Scan to find a match on the passed in span.</summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,14 @@ public readonly ref struct ValueMatch
private readonly int _length;

/// <summary>
/// Crates an instance of the <see cref="ValueMatch"/> type based on the passed in <paramref name="match"/>.
/// Crates an instance of the <see cref="ValueMatch"/> type based on the passed in <paramref name="index"/> and <paramref name="length"/>.
/// </summary>
/// <param name="match">The <see cref="Match"/> object represented by this ValueMatch.</param>
internal ValueMatch(Match match)
/// <param name="index">The position in the original span where the first character of the captured sliced span is found.</param>
/// <param name="length">The length of the captured sliced span.</param>
internal ValueMatch(int index, int length)
{
_index = match.Index;
_length = match.Length;
_index = index;
_length = length;
}

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,32 +57,6 @@ public void Enumerate_No_Match(RegexEngine engine)

}

[Theory]
[MemberData(nameof(NoneCompiledBacktracking))]
public static void EnumerateMatches_Invalid(RegexOptions options)
{
Regex regex = new Regex("e", options);
Regex.ValueMatchEnumerator enumerator = regex.EnumerateMatches("dotnet");

Assert.True(ThrowsInvalidOperationException(ref enumerator));

while (enumerator.MoveNext()) ;
Assert.True(ThrowsInvalidOperationException(ref enumerator));

bool ThrowsInvalidOperationException(ref Regex.ValueMatchEnumerator enumerator)
{
try
{
_ = enumerator.Current;
}
catch (InvalidOperationException)
{
return true;
}
return false;
}
}

[Theory]
[MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))]
public void EnumerateMatches_Lookahead(RegexEngine engine)
Expand Down

0 comments on commit aba9a54

Please sign in to comment.