Skip to content

Commit

Permalink
Update FormatLines in XmlDocStatement.cs to handle line breaks (#5214)
Browse files Browse the repository at this point in the history
Fixes #4377

---------

Co-authored-by: Dapeng Zhang <[email protected]>
  • Loading branch information
nisha-bhatia and ArcturusZhang authored Dec 12, 2024
1 parent 68f0609 commit 53f6979
Show file tree
Hide file tree
Showing 10 changed files with 525 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ internal override void Write(CodeWriter writer)
{
writer.AppendRaw("$\"");
var argumentCount = 0;
foreach ((var span, bool isLiteral) in StringExtensions.GetPathParts(Format))
foreach ((var span, bool isLiteral) in StringExtensions.GetFormattableStringFormatParts(Format))
{
if (isLiteral)
{
Expand All @@ -51,7 +51,7 @@ internal override void Write(CodeWriter writer)
private static void Validate(string format, IReadOnlyList<ValueExpression> args)
{
var count = 0;
foreach (var (_, isLiteral) in StringExtensions.GetPathParts(format))
foreach (var (_, isLiteral) in StringExtensions.GetFormattableStringFormatParts(format))
{
if (!isLiteral)
count++;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,16 +83,18 @@ public static string ToCleanName(this string name, bool isCamelCase = true)
[return: NotNullIfNotNull(nameof(name))]
public static string ToVariableName(this string name) => ToCleanName(name, isCamelCase: false);

public static GetPathPartsEnumerator GetPathParts(string? path) => new GetPathPartsEnumerator(path);
public static GetPathPartsEnumerator GetFormattableStringFormatParts(string? format) => new GetPathPartsEnumerator(format);

public static GetPathPartsEnumerator GetFormattableStringFormatParts(ReadOnlySpan<char> format) => new GetPathPartsEnumerator(format);

public ref struct GetPathPartsEnumerator
{
private ReadOnlySpan<char> _path;
public Part Current { get; private set; }

public GetPathPartsEnumerator(ReadOnlySpan<char> path)
public GetPathPartsEnumerator(ReadOnlySpan<char> format)
{
_path = path;
_path = format;
Current = default;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,29 @@ public XmlDocStatement(string startTag, string endTag, IEnumerable<FormattableSt
{
StartTag = startTag;
EndTag = endTag;
_lines = EscapeLines(lines);
_lines = NormalizeLines(lines);
InnerStatements = innerStatements;
}

private List<FormattableString> EscapeLines(IEnumerable<FormattableString> lines)
private List<FormattableString> NormalizeLines(IEnumerable<FormattableString> lines)
{
List<FormattableString> escapedLines = new List<FormattableString>();
List<FormattableString> result = new List<FormattableString>();

// break lines if they have line breaks
foreach (var line in lines)
{
escapedLines.Add(FormattableStringFactory.Create(EscapeLine(line.Format), EscapeArguments(line.GetArguments())));
var breakLines = FormattableStringHelpers.BreakLines(line);
result.AddRange(breakLines);
}
return escapedLines;

// escape lines if they have invalid characters
for (int i = 0; i < result.Count; i++)
{
var line = result[i];
result[i] = FormattableStringFactory.Create(EscapeLine(line.Format), EscapeArguments(line.GetArguments()));
}

return result;
}

private static object?[] EscapeArguments(object?[] objects)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
using System.Diagnostics.CodeAnalysis;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Text;
using Microsoft.Generator.CSharp.Providers;

namespace Microsoft.Generator.CSharp
Expand Down Expand Up @@ -97,5 +98,148 @@ public static string ReplaceLast(this string text, string oldValue, string newVa
var position = text.LastIndexOf(oldValue, StringComparison.Ordinal);
return position < 0 ? text : text.Substring(0, position) + newValue + text.Substring(position + oldValue.Length);
}

internal static IReadOnlyList<FormattableString> BreakLines(FormattableString input)
{
// handle empty input fs - we should not throw it away when it is empty
if (input.Format.Length == 0)
{
return [input]; // return it as is
}

StringBuilder formatBuilder = new StringBuilder();
var args = new List<object?>();
List<FormattableString> result = new List<FormattableString>();

var hasEmptyLastLine = BreakLinesCore(input, formatBuilder, args, result);

// if formatBuilder is not empty at end, add it to result
// or when the last char is line break, we should also construct one and add it into the result
if (formatBuilder.Length > 0 || hasEmptyLastLine)
{
FormattableString formattableString = FormattableStringFactory.Create(formatBuilder.ToString(), args.ToArray());
result.Add(formattableString);
}
return result;
}

private static bool BreakLinesCore(FormattableString input, StringBuilder formatBuilder, List<object?> args, List<FormattableString> result)
{
// stackalloc cannot be used in a loop, we must allocate it here.
// for a format string with length n, the worst case that produces the most segments is when all its content is the char to split.
// For instance, when the format string is all \n, it will produce n+1 segments (because we did not omit empty entries).
Span<Range> splitIndices = stackalloc Range[input.Format.Length + 1];
ReadOnlySpan<char> formatSpan = input.Format.AsSpan();
foreach ((ReadOnlySpan<char> span, bool isLiteral, int index) in StringExtensions.GetFormattableStringFormatParts(formatSpan))
{
// if isLiteral - put in formatBuilder
if (isLiteral)
{
var numSplits = span.SplitAny(splitIndices, ["\r\n", "\n"]);
for (int i = 0; i < numSplits; i++)
{
var part = span[splitIndices[i]];
// the literals could contain { and }, but they are unescaped. Since we are putting them back into the format, we need to escape them again.
var startsWithCurlyBrace = part.Length > 0 && (part[0] == '{' || part[0] == '}');
var start = startsWithCurlyBrace ? 1 : 0;
var endsWithCurlyBrace = part.Length > 0 && (part[^1] == '{' || part[^1] == '}');
var end = endsWithCurlyBrace ? part.Length - 1 : part.Length;
if (startsWithCurlyBrace)
{
formatBuilder.Append(part[0]).Append(part[0]);
}
if (start <= end) // ensure that we have follow up characters before we move on
{
formatBuilder.Append(part[start..end]);
if (endsWithCurlyBrace)
{
formatBuilder.Append(part[^1]).Append(part[^1]);
}
}
if (i < numSplits - 1)
{
FormattableString formattableString = FormattableStringFactory.Create(formatBuilder.ToString(), args.ToArray());
result.Add(formattableString);
formatBuilder.Clear();
args.Clear();
}
}
}
// if not Literal, is Args - recurse through Args and check if args has breaklines
else
{
var arg = input.GetArgument(index);
// we only break lines in the arguments if the argument is a string or FormattableString and it does not have a format specifier (indicating by : in span)
// we do nothing if the argument has a format specifier because we do not really know in which form to break them
// considering the chance of having these cases would be very rare, we are leaving the part of "arguments with formatter specifier" empty
var indexOfFormatSpecifier = span.IndexOf(':');
switch (arg)
{
case string str when indexOfFormatSpecifier < 0:
BreakLinesCoreForString(str.AsSpan(), formatBuilder, args, result);
break;
case FormattableString fs when indexOfFormatSpecifier < 0:
BreakLinesCore(fs, formatBuilder, args, result);
break;
default:
// if not a string or FormattableString, add to args because we cannot parse it
// add to FormatBuilder to maintain equal count between args and formatBuilder
formatBuilder.Append('{');
formatBuilder.Append(args.Count);
if (indexOfFormatSpecifier >= 0)
{
formatBuilder.Append(span[indexOfFormatSpecifier..]);
}
formatBuilder.Append('}');
args.Add(arg);
break;
}
}
}

return formatSpan[^1] == '\n';

static void BreakLinesCoreForString(ReadOnlySpan<char> span, StringBuilder formatBuilder, List<object?> args, List<FormattableString> result)
{
int start = 0, end = 0;
bool isLast = false;
// go into the loop when there are characters left
while (end < span.Length)
{
// we should not check both `\r\n` and `\n` because `\r\n` contains `\n`, if we use `IndexOf` to check both of them, there must be duplicate searches and we cannot have O(n) time complexity.
var indexOfLF = span[start..].IndexOf('\n');
// check if the line already ends.
if (indexOfLF < 0)
{
end = span.Length;
isLast = true;
}
else
{
end = start + indexOfLF;
}
// omit \r if there is one before the \n to include the case that line breaks are using \r\n
int partEnd = end;
if (end > 0 && span[end - 1] == '\r')
{
partEnd--;
}

formatBuilder.Append('{')
.Append(args.Count)
.Append('}');
args.Add(span[start..partEnd].ToString());
start = end + 1; // goes to the next char after the \n we found

if (!isLast)
{
FormattableString formattableString = FormattableStringFactory.Create(formatBuilder.ToString(), args.ToArray());
result.Add(formattableString);
formatBuilder.Clear();
args.Clear();
}
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ public CodeWriter Append(FormattableString formattableString)
const string declarationFormatString = ":D"; // :D :)
const string identifierFormatString = ":I";
const string crefFormatString = ":C"; // wraps content into "see cref" tag, available only in xmlDoc
foreach ((var span, bool isLiteral, int index) in StringExtensions.GetPathParts(formattableString.Format))
foreach ((var span, bool isLiteral, int index) in StringExtensions.GetFormattableStringFormatParts(formattableString.Format))
{
if (isLiteral)
{
Expand Down
Loading

0 comments on commit 53f6979

Please sign in to comment.