Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding Regex.EnumerateMatches #67794

Merged
merged 4 commits into from
Apr 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,10 @@ public static void CompileToAssembly(System.Text.RegularExpressions.RegexCompila
public static int Count(System.ReadOnlySpan<char> input, [System.Diagnostics.CodeAnalysis.StringSyntax(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex, "options")] string pattern, System.Text.RegularExpressions.RegexOptions options) { throw null; }
public static int Count(System.ReadOnlySpan<char> input, [System.Diagnostics.CodeAnalysis.StringSyntax(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex, "options")] string pattern, System.Text.RegularExpressions.RegexOptions options, System.TimeSpan matchTimeout) { throw null; }
public static string Escape(string str) { throw null; }
public System.Text.RegularExpressions.Regex.ValueMatchEnumerator EnumerateMatches(System.ReadOnlySpan<char> input) { throw null; }
public static System.Text.RegularExpressions.Regex.ValueMatchEnumerator EnumerateMatches(System.ReadOnlySpan<char> input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute("Regex")] string pattern) { throw null; }
public static System.Text.RegularExpressions.Regex.ValueMatchEnumerator EnumerateMatches(System.ReadOnlySpan<char> input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute("Regex", new object[]{ "options"})] string pattern, System.Text.RegularExpressions.RegexOptions options) { throw null; }
public static System.Text.RegularExpressions.Regex.ValueMatchEnumerator EnumerateMatches(System.ReadOnlySpan<char> input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute("Regex", new object[]{ "options"})] string pattern, System.Text.RegularExpressions.RegexOptions options, System.TimeSpan matchTimeout) { throw null; }
public string[] GetGroupNames() { throw null; }
public int[] GetGroupNumbers() { throw null; }
public string GroupNameFromNumber(int i) { throw null; }
Expand Down Expand Up @@ -220,6 +224,14 @@ void System.Runtime.Serialization.ISerializable.GetObjectData(System.Runtime.Ser
protected bool UseOptionC() { throw null; }
protected internal bool UseOptionR() { throw null; }
protected internal static void ValidateMatchTimeout(System.TimeSpan matchTimeout) { }
public ref partial struct ValueMatchEnumerator
{
private object _dummy;
private int _dummyPrimitive;
public readonly System.Text.RegularExpressions.ValueMatch Current { get { throw null; } }
public readonly System.Text.RegularExpressions.Regex.ValueMatchEnumerator GetEnumerator() { throw null; }
public bool MoveNext() { throw null; }
}
}
[System.ObsoleteAttribute("Regex.CompileToAssembly is obsolete and not supported. Use the RegexGeneratorAttribute with the regular expression source generator instead.", DiagnosticId = "SYSLIB0036", UrlFormat = "https://aka.ms/dotnet-warnings/{0}")]
public partial class RegexCompilationInfo
Expand Down Expand Up @@ -359,4 +371,10 @@ public abstract partial class RegexRunnerFactory
protected RegexRunnerFactory() { }
protected internal abstract System.Text.RegularExpressions.RegexRunner CreateInstance();
}
public readonly ref partial struct ValueMatch
{
private readonly int _dummyPrimitive;
public int Index { get { throw null; } }
public int Length { get { throw null; } }
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
<ItemGroup>
<Compile Include="System\Collections\HashtableExtensions.cs" />
<Compile Include="System\Collections\Generic\ValueListBuilder.Pop.cs" />
<Compile Include="System\Text\RegularExpressions\ValueMatch.cs" />
<Compile Include="System\Threading\StackHelper.cs" />
<Compile Include="System\Text\SegmentStringBuilder.cs" />
<Compile Include="System\Text\RegularExpressions\Capture.cs" />
Expand All @@ -23,6 +24,7 @@
<Compile Include="System\Text\RegularExpressions\Regex.Match.cs" />
<Compile Include="System\Text\RegularExpressions\Regex.Replace.cs" />
<Compile Include="System\Text\RegularExpressions\Regex.Split.cs" />
<Compile Include="System\Text\RegularExpressions\Regex.EnumerateMatches.cs" />
<Compile Include="System\Text\RegularExpressions\Regex.Timeout.cs" />
<Compile Include="System\Text\RegularExpressions\RegexCaseBehavior.cs" />
<Compile Include="System\Text\RegularExpressions\RegexCaseEquivalences.Data.cs" />
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;

namespace System.Text.RegularExpressions
{
public partial class Regex
{
/// <summary>
/// Searches an input span for all occurrences of a regular expression and returns a <see cref="ValueMatchEnumerator"/> to iterate over the matches.
/// </summary>
/// <remarks>
/// Each match won't actually happen until <see cref="ValueMatchEnumerator.MoveNext"/> is invoked on the enumerator, with one match being performed per <see cref="ValueMatchEnumerator.MoveNext"/> call.
/// Since the evaluation of the match happens lazily, any changes to the passed in input in between calls to <see cref="ValueMatchEnumerator.MoveNext"/> will affect the match results.
/// The enumerator returned by this method, as well as the structs returned by the enumerator that wrap each match found in the input are ref structs which
/// make this method be amortized allocation free.
joperezr marked this conversation as resolved.
Show resolved Hide resolved
/// </remarks>
/// <param name="input">The span to search for a match.</param>
/// <param name="pattern">The regular expression pattern to match.</param>
/// <returns>A <see cref="ValueMatchEnumerator"/> to iterate over the matches.</returns>
/// <exception cref="ArgumentNullException"><paramref name="pattern"/> is null.</exception>
/// <exception cref="RegexParseException">A regular expression parsing error occurred.</exception>
public static ValueMatchEnumerator EnumerateMatches(ReadOnlySpan<char> input, [StringSyntax(StringSyntaxAttribute.Regex)] string pattern) =>
RegexCache.GetOrAdd(pattern).EnumerateMatches(input);

/// <summary>
/// Searches an input span for all occurrences of a regular expression and returns a <see cref="ValueMatchEnumerator"/> to iterate over the matches.
/// </summary>
/// <remarks>
/// Each match won't actually happen until <see cref="ValueMatchEnumerator.MoveNext"/> is invoked on the enumerator, with one match being performed per <see cref="ValueMatchEnumerator.MoveNext"/> call.
/// Since the evaluation of the match happens lazily, any changes to the passed in input in between calls to <see cref="ValueMatchEnumerator.MoveNext"/> will affect the match results.
/// The enumerator returned by this method, as well as the structs returned by the enumerator that wrap each match found in the input are ref structs which
/// make this method be amortized allocation free.
/// </remarks>
/// <param name="input">The span to search for a match.</param>
/// <param name="pattern">The regular expression pattern to match.</param>
/// <param name="options">A bitwise combination of the enumeration values that specify options for matching.</param>
/// <returns>A <see cref="ValueMatchEnumerator"/> to iterate over the matches.</returns>
/// <exception cref="ArgumentNullException"><paramref name="pattern"/> is null.</exception>
/// <exception cref="ArgumentOutOfRangeException"><paramref name="options"/> is not a valid bitwise combination of RegexOptions values.</exception>
/// <exception cref="RegexParseException">A regular expression parsing error occurred.</exception>
public static ValueMatchEnumerator EnumerateMatches(ReadOnlySpan<char> input, [StringSyntax(StringSyntaxAttribute.Regex, "options")] string pattern, RegexOptions options) =>
RegexCache.GetOrAdd(pattern, options, s_defaultMatchTimeout).EnumerateMatches(input);

/// <summary>
/// Searches an input span for all occurrences of a regular expression and returns a <see cref="ValueMatchEnumerator"/> to iterate over the matches.
/// </summary>
/// <remarks>
/// Each match won't actually happen until <see cref="ValueMatchEnumerator.MoveNext"/> is invoked on the enumerator, with one match being performed per <see cref="ValueMatchEnumerator.MoveNext"/> call.
/// Since the evaluation of the match happens lazily, any changes to the passed in input in between calls to <see cref="ValueMatchEnumerator.MoveNext"/> will affect the match results.
/// The enumerator returned by this method, as well as the structs returned by the enumerator that wrap each match found in the input are ref structs which
/// make this method be amortized allocation free.
/// </remarks>
/// <param name="input">The span to search for a match.</param>
/// <param name="pattern">The regular expression pattern to match.</param>
/// <param name="options">A bitwise combination of the enumeration values that specify options for matching.</param>
/// <param name="matchTimeout">A time-out interval, or <see cref="InfiniteMatchTimeout"/> to indicate that the method should not time out.</param>
/// <returns>A <see cref="ValueMatchEnumerator"/> to iterate over the matches.</returns>
/// <exception cref="ArgumentNullException"><paramref name="pattern"/> is null.</exception>
/// <exception cref="ArgumentOutOfRangeException"><paramref name="options"/> is not a valid bitwise combination of RegexOptions values, or <paramref name="matchTimeout"/> is negative, zero, or greater than approximately 24 days.</exception>
/// <exception cref="RegexParseException">A regular expression parsing error occurred.</exception>
public static ValueMatchEnumerator EnumerateMatches(ReadOnlySpan<char> input, [StringSyntax(StringSyntaxAttribute.Regex, "options")] string pattern, RegexOptions options, TimeSpan matchTimeout) =>
RegexCache.GetOrAdd(pattern, options, matchTimeout).EnumerateMatches(input);

/// <summary>
/// Searches an input span for all occurrences of a regular expression and returns a <see cref="ValueMatchEnumerator"/> to iterate over the matches.
/// </summary>
/// <remarks>
/// Each match won't actually happen until <see cref="ValueMatchEnumerator.MoveNext"/> is invoked on the enumerator, with one match being performed per <see cref="ValueMatchEnumerator.MoveNext"/> call.
/// Since the evaluation of the match happens lazily, any changes to the passed in input in between calls to <see cref="ValueMatchEnumerator.MoveNext"/> will affect the match results.
/// The enumerator returned by this method, as well as the structs returned by the enumerator that wrap each match found in the input are ref structs which
/// make this method be amortized allocation free.
/// </remarks>
/// <param name="input">The span to search for a match.</param>
/// <returns>A <see cref="ValueMatchEnumerator"/> to iterate over the matches.</returns>
public ValueMatchEnumerator EnumerateMatches(ReadOnlySpan<char> input) =>
new ValueMatchEnumerator(this, input, RightToLeft ? input.Length : 0);

/// <summary>
/// Represents an enumerator containing the set of successful matches found by iteratively applying a regular expression pattern to the input span.
/// </summary>
/// <remarks>
/// The enumerator has no public constructor. The <see cref="Regex.EnumerateMatches(ReadOnlySpan{char})"/> method returns a <see cref="Regex.ValueMatchEnumerator"/>
/// object.The enumerator will lazily iterate over zero or more <see cref="ValueMatch"/> objects. If there is at least one successful match in the span, then
/// <see cref="MoveNext"/> returns <see langword="true"/> and <see cref="Current"/> will contain the first <see cref="ValueMatch"/>. If there are no successful matches,
/// then <see cref="MoveNext"/> returns <see langword="false"/> and <see cref="Current"/> throws an <see cref="InvalidOperationException"/>.
///
/// This type is a ref struct since it stores the input span as a field in order to be able to lazily iterate over it.
joperezr marked this conversation as resolved.
Show resolved Hide resolved
/// </remarks>
public ref struct ValueMatchEnumerator
{
private readonly Regex _regex;
private readonly ReadOnlySpan<char> _input;
private ValueMatch _current;
private int _startAt;
private int _prevLen;

/// <summary>
/// Creates an instance of the <see cref="ValueMatchEnumerator"/> for the passed in <paramref name="regex"/> which iterates over <paramref name="input"/>.
/// </summary>
/// <param name="regex">The <see cref="Regex"/> to use for finding matches.</param>
/// <param name="input">The input span to iterate over.</param>
/// <param name="startAt">The position where the engine should start looking for matches from.</param>
internal ValueMatchEnumerator(Regex regex, ReadOnlySpan<char> input, int startAt)
{
_regex = regex;
_input = input;
_current = default;
_startAt = startAt;
_prevLen = -1;
}

/// <summary>
/// Provides an enumerator that iterates through the matches in the input span.
/// </summary>
/// <returns>A copy of this enumerator.</returns>
public readonly ValueMatchEnumerator GetEnumerator() => this;

/// <summary>
/// Advances the enumerator to the next match in the span.
/// </summary>
/// <returns>
/// <see langword="true"/> if the enumerator was successfully advanced to the next element; <see langword="false"/> if the enumerator cannot find additional matches.
/// </returns>
public bool MoveNext()
{
Match? match = _regex.RunSingleMatch(quick: false, _prevLen, _input, _startAt);
Debug.Assert(match != null, "Match shouldn't be null because we passed quick = false.");
if (match != RegularExpressions.Match.Empty)
{
_current = new ValueMatch(match.Index, match.Length);
_startAt = match._textpos;
_prevLen = match.Length;
return true;
}
return false;
}

/// <summary>
/// Gets the <see cref="ValueMatch"/> element at the current position of the enumerator.
/// </summary>
/// <exception cref="InvalidOperationException">Enumeration has either not started or has already finished.</exception>
public readonly ValueMatch Current => _current;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ public bool IsMatch(string input)
/// <returns><see langword="true"/> if the regular expression finds a match; otherwise, <see langword="false"/>.</returns>
/// <exception cref="RegexMatchTimeoutException">A time-out ocurred.</exception>
public bool IsMatch(ReadOnlySpan<char> input) =>
RunSingleMatch(input, RightToLeft ? input.Length : 0) is null;
RunSingleMatch(quick: true, -1, input, RightToLeft ? input.Length : 0) is null;

/// <summary>
/// Searches the input string for one or more matches using the previous pattern and options,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ protected void InitializeReferences()
}

/// <summary>Internal worker which will scan the passed in span <paramref name="input"/> for a match. Used by public APIs.</summary>
internal Match? RunSingleMatch(ReadOnlySpan<char> input, int startat)
internal Match? RunSingleMatch(bool quick, int prevlen, ReadOnlySpan<char> input, int startat)
{
// startat parameter is always either 0 or input.Length since public API for IsMatch doesn't have an overload
// that takes in startat.
Expand All @@ -413,13 +413,45 @@ protected void InitializeReferences()
try
{
runner.InitializeTimeout(internalMatchTimeout);
runner.InitializeForScan(this, input, startat, quick: true);
runner.InitializeForScan(this, input, startat, quick);

// If previous match was empty or failed, advance by one before matching.
if (prevlen == 0)
{
if (RightToLeft)
{
if (runner.runtextstart == 0)
{
return RegularExpressions.Match.Empty;
}
runner.runtextpos--;
}
else
{
if (runner.runtextstart == input.Length)
{
return RegularExpressions.Match.Empty;
}
runner.runtextpos++;
}
}

runner.Scan(input);

// If runmatch is null it means that an override of Scan didn't implement it correctly, so we will
// let this null ref since there are lots of ways where you can end up in a erroneous state.
return runner.runmatch!.FoundMatch ? null : RegularExpressions.Match.Empty;
Match match = runner.runmatch!;
if (match!.FoundMatch)
{
if (quick)
{
return null;
}
match.Tidy(runner.runtextpos, 0);
return match;
}

return RegularExpressions.Match.Empty;
}
finally
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

namespace System.Text.RegularExpressions
{
/// <summary>
/// Represents the results from a single regular expression match.
/// </summary>
/// <remarks>
/// The <see cref="ValueMatch"/> type is immutable and has no public constructor. An instance of the <see cref="ValueMatch"/> struct is returned by the
/// <see cref="Regex.ValueMatchEnumerator.Current"/> method when iterating over the results from calling <see cref="Regex.EnumerateMatches(ReadOnlySpan{char})"/>.
/// </remarks>
public readonly ref struct ValueMatch
{
private readonly int _index;
private readonly int _length;

/// <summary>
/// Crates an instance of the <see cref="ValueMatch"/> type based on the passed in <paramref name="index"/> and <paramref name="length"/>.
/// </summary>
/// <param name="index">The position in the original span where the first character of the captured sliced span is found.</param>
/// <param name="length">The length of the captured sliced span.</param>
internal ValueMatch(int index, int length)
{
_index = index;
_length = length;
}

/// <summary>
/// Gets the position in the original span where the first character of the captured sliced span is found.
/// </summary>
public int Index => _index;

/// <summary>
/// Gets the length of the captured sliced span.
/// </summary>
public int Length => _length;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

namespace System.Text.RegularExpressions.Tests
{
public class RegexCountTests
public partial class RegexCountTests
{
[Theory]
[MemberData(nameof(Count_ReturnsExpectedCount_TestData))]
Expand Down
Loading