Skip to content

Commit

Permalink
Improve incremental handling in regex generator (#83868)
Browse files Browse the repository at this point in the history
* Improve incrementalism:
- Don't calculate the tree or analysis inside the semantic info.
- Dont' store methodDeclartionSyntax and store a comparable across runs location instead.

* Add comments, clean up, and simplify parser

---------

Co-authored-by: Chris Sienkiewicz <[email protected]>
  • Loading branch information
stephentoub and chsienki authored Mar 24, 2023
1 parent 0111ffd commit 8a7a91c
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 74 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections.Immutable;
using System.Diagnostics;
using System.Globalization;
using System.Linq;
using System.Threading;
Expand All @@ -19,17 +20,16 @@ public partial class RegexGenerator
private const string GeneratedRegexAttributeName = "System.Text.RegularExpressions.GeneratedRegexAttribute";

// Returns null if nothing to do, Diagnostic if there's an error to report, or RegexType if the type was analyzed successfully.
private static object? GetSemanticTargetForGeneration(
private static object? GetRegexMethodDataOrFailureDiagnostic(
GeneratorAttributeSyntaxContext context, CancellationToken cancellationToken)
{
var methodSyntax = (MethodDeclarationSyntax)context.TargetNode;
SemanticModel sm = context.SemanticModel;

Compilation compilation = sm.Compilation;
INamedTypeSymbol? regexSymbol = compilation.GetBestTypeByMetadataName(RegexName);
INamedTypeSymbol? generatedRegexAttributeSymbol = compilation.GetBestTypeByMetadataName(GeneratedRegexAttributeName);

if (regexSymbol is null || generatedRegexAttributeSymbol is null)
if (regexSymbol is null)
{
// Required types aren't available
return null;
Expand All @@ -47,71 +47,51 @@ public partial class RegexGenerator
return null;
}

ImmutableArray<AttributeData>? boundAttributes = regexMethodSymbol.GetAttributes();
if (boundAttributes is null || boundAttributes.Value.Length == 0)
ImmutableArray<AttributeData> boundAttributes = context.Attributes;
if (boundAttributes.Length != 1)
{
return null;
return Diagnostic.Create(DiagnosticDescriptors.MultipleGeneratedRegexAttributes, methodSyntax.GetLocation());
}
AttributeData generatedRegexAttr = boundAttributes[0];

bool attributeFound = false;
string? pattern = null;
if (generatedRegexAttr.ConstructorArguments.Any(ca => ca.Kind == TypedConstantKind.Error))
{
return Diagnostic.Create(DiagnosticDescriptors.InvalidGeneratedRegexAttribute, methodSyntax.GetLocation());
}

ImmutableArray<TypedConstant> items = generatedRegexAttr.ConstructorArguments;
if (items.Length is 0 or > 4)
{
return Diagnostic.Create(DiagnosticDescriptors.InvalidGeneratedRegexAttribute, methodSyntax.GetLocation());
}

string? pattern = items[0].Value as string;
int? options = null;
int? matchTimeout = null;
string? cultureName = string.Empty;
foreach (AttributeData attributeData in boundAttributes)
if (items.Length >= 2)
{
if (!SymbolEqualityComparer.Default.Equals(attributeData.AttributeClass, generatedRegexAttributeSymbol))
options = items[1].Value as int?;
if (items.Length == 4)
{
continue;
matchTimeout = items[2].Value as int?;
cultureName = items[3].Value as string;
}

if (attributeData.ConstructorArguments.Any(ca => ca.Kind == TypedConstantKind.Error))
{
return Diagnostic.Create(DiagnosticDescriptors.InvalidGeneratedRegexAttribute, methodSyntax.GetLocation());
}

if (pattern is not null)
{
return Diagnostic.Create(DiagnosticDescriptors.MultipleGeneratedRegexAttributes, methodSyntax.GetLocation());
}

ImmutableArray<TypedConstant> items = attributeData.ConstructorArguments;
if (items.Length == 0 || items.Length > 4)
{
return Diagnostic.Create(DiagnosticDescriptors.InvalidGeneratedRegexAttribute, methodSyntax.GetLocation());
}

attributeFound = true;
pattern = items[0].Value as string;
if (items.Length >= 2)
// If there are 3 parameters, we need to check if the third argument is
// int matchTimeoutMilliseconds, or string cultureName.
else if (items.Length == 3)
{
options = items[1].Value as int?;
if (items.Length == 4)
if (items[2].Type?.SpecialType == SpecialType.System_Int32)
{
matchTimeout = items[2].Value as int?;
cultureName = items[3].Value as string;
}
// If there are 3 parameters, we need to check if the third argument is
// int matchTimeoutMilliseconds, or string cultureName.
else if (items.Length == 3)
else
{
if (items[2].Type?.SpecialType == SpecialType.System_Int32)
{
matchTimeout = items[2].Value as int?;
}
else
{
cultureName = items[2].Value as string;
}
cultureName = items[2].Value as string;
}
}
}

if (!attributeFound)
{
return null;
}

if (pattern is null || cultureName is null)
{
return Diagnostic.Create(DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), "(null)");
Expand All @@ -128,7 +108,7 @@ public partial class RegexGenerator

RegexOptions regexOptions = options is not null ? (RegexOptions)options : RegexOptions.None;

// If RegexOptions.IgnoreCase was specified or the inline ignore case option `(?i)` is present in the pattern, then we will (in priority order):
// If RegexOptions.IgnoreCase was specified or the inline ignore case option `(?i)` is present in the pattern, then we will (in priority order):
// - If a culture name was passed in:
// - If RegexOptions.CultureInvariant was also passed in, then we emit a diagnostic due to the explicit conflict.
// - We try to initialize a culture using the passed in culture name to be used for case-sensitive comparisons. If
Expand Down Expand Up @@ -186,19 +166,6 @@ public partial class RegexGenerator
return Diagnostic.Create(DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), "matchTimeout");
}

// Parse the input pattern
RegexTree regexTree;
AnalysisResults analysis;
try
{
regexTree = RegexParser.Parse(pattern, regexOptions | RegexOptions.Compiled, culture); // make sure Compiled is included to get all optimizations applied to it
analysis = RegexTreeAnalyzer.Analyze(regexTree);
}
catch (Exception e)
{
return Diagnostic.Create(DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), e.Message);
}

// Determine the namespace the class is declared in, if any
string? ns = regexMethodSymbol.ContainingType?.ContainingNamespace?.ToDisplayString(
SymbolDisplayFormat.FullyQualifiedFormat.WithGlobalNamespaceStyle(SymbolDisplayGlobalNamespaceStyle.Omitted));
Expand All @@ -208,16 +175,15 @@ public partial class RegexGenerator
ns ?? string.Empty,
$"{typeDec.Identifier}{typeDec.TypeParameterList}");

var regexMethod = new RegexMethod(
var result = new RegexPatternAndSyntax(
regexType,
methodSyntax,
GetComparableLocation(methodSyntax),
regexMethodSymbol.Name,
methodSyntax.Modifiers.ToString(),
pattern,
regexOptions,
matchTimeout,
regexTree,
analysis);
culture);

RegexType current = regexType;
var parent = typeDec.Parent as TypeDeclarationSyntax;
Expand All @@ -233,18 +199,30 @@ public partial class RegexGenerator
parent = parent.Parent as TypeDeclarationSyntax;
}

return regexMethod;
return result;

static bool IsAllowedKind(SyntaxKind kind) =>
kind == SyntaxKind.ClassDeclaration ||
kind == SyntaxKind.StructDeclaration ||
kind == SyntaxKind.RecordDeclaration ||
kind == SyntaxKind.RecordStructDeclaration ||
kind == SyntaxKind.InterfaceDeclaration;


// Get a Location object that doesn't store a reference to the compilation.
// That allows it to compare equally across compilations.
static Location GetComparableLocation(MethodDeclarationSyntax method)
{
var location = method.GetLocation();
return Location.Create(location.SourceTree?.FilePath ?? string.Empty, location.SourceSpan, location.GetLineSpan().Span);
}
}

/// <summary>A regex method.</summary>
internal sealed record RegexMethod(RegexType DeclaringType, MethodDeclarationSyntax MethodSyntax, string MethodName, string Modifiers, string Pattern, RegexOptions Options, int? MatchTimeout, RegexTree Tree, AnalysisResults Analysis)
/// <summary>Data about a regex directly from the GeneratedRegex attribute.</summary>
internal sealed record RegexPatternAndSyntax(RegexType DeclaringType, Location DiagnosticLocation, string MethodName, string Modifiers, string Pattern, RegexOptions Options, int? MatchTimeout, CultureInfo Culture);

/// <summary>Data about a regex, including a fully parsed RegexTree and subsequent analysis.</summary>
internal sealed record RegexMethod(RegexType DeclaringType, Location DiagnosticLocation, string MethodName, string Modifiers, string Pattern, RegexOptions Options, int? MatchTimeout, RegexTree Tree, AnalysisResults Analysis)
{
public string? GeneratedName { get; set; }
public bool IsDuplicate { get; set; }
Expand Down
38 changes: 35 additions & 3 deletions src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,43 @@ x.Options is CSharpCompilationOptions options ?
context.SyntaxProvider

// Find all MethodDeclarationSyntax nodes attributed with GeneratedRegex and gather the required information.
// The predicate will be run once for every attributed node in the same file that's being modified.
// The transform will be run once for every attributed node in the compilation.
// Thus, both should do the minimal amount of work required and get out. This should also have extracted
// everything from the target necessary to do all subsequent analysis and should return an object that's
// meaningfully comparable and that doesn't reference anything from the compilation: we want to ensure
// that any successful cached results are idempotent for the input such that they don't trigger downstream work
// if there are no changes.
.ForAttributeWithMetadataName(
GeneratedRegexAttributeName,
(node, _) => node is MethodDeclarationSyntax,
GetSemanticTargetForGeneration)
GetRegexMethodDataOrFailureDiagnostic)

// Filter out any parsing errors that resulted in null objects being returned.
.Where(static m => m is not null)

// Incorporate the compilation data, as it impacts code generation.
// The input here will either be a Diagnostic (in the case of something erroneous detected in GetRegexMethodDataOrFailureDiagnostic)
// or it will be a RegexPatternAndSyntax containing all of the successfully parsed data from the attribute/method.
.Select((methodOrDiagnostic, _) =>
{
if (methodOrDiagnostic is RegexPatternAndSyntax method)
{
try
{
RegexTree regexTree = RegexParser.Parse(method.Pattern, method.Options | RegexOptions.Compiled, method.Culture); // make sure Compiled is included to get all optimizations applied to it
AnalysisResults analysis = RegexTreeAnalyzer.Analyze(regexTree);
return new RegexMethod(method.DeclaringType, method.DiagnosticLocation, method.MethodName, method.Modifiers, method.Pattern, method.Options, method.MatchTimeout, regexTree, analysis);
}
catch (Exception e)
{
return Diagnostic.Create(DiagnosticDescriptors.InvalidRegexArguments, method.DiagnosticLocation, e.Message);
}
}

return methodOrDiagnostic;
})

// Now incorporate the compilation data, as it impacts code generation.
.Combine(compilationDataProvider)

// Generate the RunnerFactory for each regex, if possible. This is where the bulk of the implementation occurs.
Expand All @@ -80,7 +110,7 @@ x.Options is CSharpCompilationOptions options ?
// We'll still output a limited implementation that just caches a new Regex(...).
if (!SupportsCodeGeneration(regexMethod, methodOrDiagnosticAndCompilationData.Right.LanguageVersion, out string? reason))
{
return (regexMethod, reason, Diagnostic.Create(DiagnosticDescriptors.LimitedSourceGeneration, regexMethod.MethodSyntax.GetLocation()), methodOrDiagnosticAndCompilationData.Right);
return (regexMethod, reason, Diagnostic.Create(DiagnosticDescriptors.LimitedSourceGeneration, regexMethod.DiagnosticLocation), methodOrDiagnosticAndCompilationData.Right);
}

// Generate the core logic for the regex.
Expand All @@ -93,6 +123,8 @@ x.Options is CSharpCompilationOptions options ?
writer.Indent -= 2;
return (regexMethod, sw.ToString(), requiredHelpers, methodOrDiagnosticAndCompilationData.Right);
})

// Combine all of the generated text outputs into a single batch. We then generate a single source output from that batch.
.Collect();

// When there something to output, take all the generated strings and concatenate them to output,
Expand Down

0 comments on commit 8a7a91c

Please sign in to comment.