Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove Or, And, Not + related fixes #69839

Merged
merged 7 commits into from
May 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
<ItemGroup>
<Compile Include="System\Collections\HashtableExtensions.cs" />
<Compile Include="System\Collections\Generic\ValueListBuilder.Pop.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\DoublyLinkedList.cs" />
<Compile Include="System\Text\RegularExpressions\ValueMatch.cs" />
<Compile Include="System\Threading\StackHelper.cs" />
<Compile Include="System\Text\SegmentStringBuilder.cs" />
Expand Down Expand Up @@ -65,25 +64,23 @@
<Compile Include="System\Text\RegularExpressions\Symbolic\CharKind.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\CharSetSolver.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\DerivativeEffect.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\DgmlWriter.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\DfaMatchingState.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\DoublyLinkedList.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\ISolver.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\MintermClassifier.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\MintermGenerator.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\RegexNodeConverter.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SparseIntMap.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicMatch.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicNFA.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexBuilder.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexNode.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexKind.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexInfo.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexMatcher.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexRunnerFactory.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexSampler.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexSet.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\TransitionRegex.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\TransitionRegexKind.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexMatcher.Dgml.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexMatcher.Explore.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexMatcher.Sample.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\UInt64Solver.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\UnicodeCategoryConditions.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\UnicodeCategoryRanges.cs" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,16 @@ public partial class Regex
{
/// <summary>Unwind the regex and save the resulting state graph in DGML</summary>
/// <param name="writer">Writer to which the DGML is written.</param>
/// <param name="nfa">True to create an NFA instead of a DFA.</param>
/// <param name="addDotStar">True to prepend .*? onto the pattern (outside of the implicit root capture).</param>
/// <param name="reverse">If true, then unwind the regex backwards (and <paramref name="addDotStar"/> is ignored).</param>
/// <param name="maxStates">The approximate maximum number of states to include; less than or equal to 0 for no maximum.</param>
/// <param name="maxLabelLength">maximum length of labels in nodes anything over that length is indicated with .. </param>
[ExcludeFromCodeCoverage(Justification = "Debug only")]
internal void SaveDGML(TextWriter writer, bool nfa, bool addDotStar, bool reverse, int maxStates, int maxLabelLength)
internal void SaveDGML(TextWriter writer, int maxLabelLength)
{
if (factory is not SymbolicRegexRunnerFactory srmFactory)
{
throw new NotSupportedException();
}

srmFactory._matcher.SaveDGML(writer, nfa, addDotStar, reverse, maxStates, maxLabelLength);
srmFactory._matcher.SaveDGML(writer, maxLabelLength);
}

/// <summary>
Expand All @@ -44,17 +40,43 @@ internal static void GenerateUnicodeTables(string path)
/// </summary>
/// <param name="k">upper bound on the number of generated strings</param>
/// <param name="randomseed">random seed for the generator, 0 means no random seed</param>
/// <param name="negative">if true then generate inputs that do not match</param>
/// <returns></returns>
[ExcludeFromCodeCoverage(Justification = "Debug only")]
internal IEnumerable<string> GenerateRandomMembers(int k, int randomseed, bool negative)
internal IEnumerable<string> SampleMatches(int k, int randomseed)
{
if (factory is not SymbolicRegexRunnerFactory srmFactory)
{
throw new NotSupportedException();
}

return srmFactory._matcher.GenerateRandomMembers(k, randomseed, negative);
return srmFactory._matcher.SampleMatches(k, randomseed);
}

/// <summary>
/// Explore transitions of the DFA and/or NFA exhaustively. DFA exploration, if requested, is done only up to the
/// DFA state limit. NFA exploration, if requested, continues from the states unexplored by the DFA exploration,
/// or from the initial states if DFA exploration was not requested. NFA exploration will always finish.
/// </summary>
/// <remarks>
/// This may result in a different automaton being explored than matching would produce, since if the limit for
/// the number of DFA states is reached then the order in which states and transitions are explored is significant.
olsaarik marked this conversation as resolved.
Show resolved Hide resolved
/// During matching that order is driven by the input, while this function may use any order (currently it is
/// breadth-first).
/// </remarks>
/// <param name="includeDotStarred">whether to explore the .*? prefixed version of the pattern</param>
/// <param name="includeReverse">whether to explore the reversed pattern</param>
/// <param name="includeOriginal">whether to explore the original pattern</param>
/// <param name="exploreDfa">whether to explore DFA transitions</param>
/// <param name="exploreNfa">whether to explore NFA transitions</param>
[ExcludeFromCodeCoverage(Justification = "Debug only")]
internal void Explore(bool includeDotStarred, bool includeReverse, bool includeOriginal, bool exploreDfa, bool exploreNfa)
{
if (factory is not SymbolicRegexRunnerFactory srmFactory)
{
throw new NotSupportedException();
}

srmFactory._matcher.Explore(includeDotStarred, includeReverse, includeOriginal, exploreDfa, exploreNfa);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ public partial class Regex : ISerializable
// so this is a convenient place to include them rather than needing a debug-only illink file.
[DynamicDependency(nameof(SaveDGML))]
[DynamicDependency(nameof(GenerateUnicodeTables))]
[DynamicDependency(nameof(GenerateRandomMembers))]
[DynamicDependency(nameof(SampleMatches))]
[DynamicDependency(nameof(Explore))]
#endif
protected Regex()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,23 +29,11 @@ internal DfaMatchingState(SymbolicRegexNode<TSet> node, uint prevCharKind)
internal bool IsDeadend => Node.IsNothing;

/// <summary>The node must be nullable here</summary>
internal int FixedLength
internal int FixedLength(uint nextCharKind)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's great that you were able to fix the fixed-length markers. Do we know if this was contributing to some of the perf slowdowns that had been measured?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Likely, but I don't for sure know yet. I'll measure against current main after I get this merged (to unblock Margus).

{
get
{
if (Node._kind == SymbolicRegexNodeKind.FixedLengthMarker)
{
return Node._lower;
}

if (Node._kind == SymbolicRegexNodeKind.Or)
{
Debug.Assert(Node._alts is not null);
return Node._alts._maximumLength;
}

return -1;
}
Debug.Assert(nextCharKind is 0 or CharKind.BeginningEnd or CharKind.Newline or CharKind.WordLetter or CharKind.NewLineS);
uint context = CharKind.Context(PrevCharKind, nextCharKind);
return Node.ResolveFixedLength(context);
}

/// <summary>If true then the state is a dead-end, rejects all inputs.</summary>
Expand Down
Loading