Skip to content

Commit

Permalink
Use copied implementation to tokenize commandLine for Roslyn.
Browse files Browse the repository at this point in the history
  • Loading branch information
Corniel committed Mar 3, 2024
1 parent 175892f commit 3268575
Showing 1 changed file with 133 additions and 3 deletions.
136 changes: 133 additions & 3 deletions src/Buildalyzer/Compiler/RoslynCommandLineParser.cs
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
#nullable enable

using Microsoft.CodeAnalysis;

namespace Buildalyzer;

internal static class RoslynCommandLineParser
{
[Pure]
public static string[]? SplitCommandLineIntoArguments(string? commandLine, params string[] execs)
=> Split(CommandLineParser.SplitCommandLineIntoArguments(commandLine ?? string.Empty, removeHashComments: true).ToArray(), execs);
=> Split(Tokenize(commandLine ?? string.Empty).ToArray(), execs);

[Pure]
private static string[]? Split(string[] args, string[] execs)
Expand All @@ -25,4 +23,136 @@ internal static class RoslynCommandLineParser
}
return null;
}

/// <summary>
/// Split a command line by the same rules as Main would get the commands except the original
/// state of backslashes and quotes are preserved. For example in normal Windows command line
/// parsing the following command lines would produce equivalent Main arguments:
///
/// - /r:a,b
/// - /r:"a,b"
///
/// This method will differ as the latter will have the quotes preserved. The only case where
/// quotes are removed is when the entire argument is surrounded by quotes without any inner
/// quotes.
/// </summary>
/// <remarks>
/// This code originates at https://sourceroslyn.io/#Microsoft.CodeAnalysis/InternalUtilities/CommandLineUtilities.cs,
/// and is licensed to the .NET Foundation under one or more agreements.
///
/// Rules for command line parsing, according to MSDN:
///
/// Arguments are delimited by white space, which is either a space or a tab.
///
/// A string surrounded by double quotation marks ("string") is interpreted
/// as a single argument, regardless of white space contained within.
/// A quoted string can be embedded in an argument.
///
/// A double quotation mark preceded by a backslash (\") is interpreted as a
/// literal double quotation mark character (").
///
/// Backslashes are interpreted literally, unless they immediately precede a
/// double quotation mark.
///
/// If an even number of backslashes is followed by a double quotation mark,
/// one backslash is placed in the argv array for every pair of backslashes,
/// and the double quotation mark is interpreted as a string delimiter.
///
/// If an odd number of backslashes is followed by a double quotation mark,
/// one backslash is placed in the argv array for every pair of backslashes,
/// and the double quotation mark is "escaped" by the remaining backslash,
/// causing a literal double quotation mark (") to be placed in argv.
/// </remarks>
public static IEnumerable<string> Tokenize(string commandLine)
{
var i = 0;

var builder = new StringBuilder();

Check warning on line 70 in src/Buildalyzer/Compiler/RoslynCommandLineParser.cs

View workflow job for this annotation

GitHub Actions / Build (windows-latest)

([deprecated] Use RCS1264 instead) Use explicit type instead of 'var' (https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1012)

Check warning on line 70 in src/Buildalyzer/Compiler/RoslynCommandLineParser.cs

View workflow job for this annotation

GitHub Actions / Build (ubuntu-latest)

([deprecated] Use RCS1264 instead) Use explicit type instead of 'var' (https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1012)

while (i < commandLine.Length)
{
while (i < commandLine.Length && char.IsWhiteSpace(commandLine[i]))
{
i++;
}

if (i == commandLine.Length)
{
break;
}

// Remove hash comments
if (commandLine[i] == '#')
{
break;
}

var quoteCount = 0;
builder.Length = 0;
while (i < commandLine.Length && (!char.IsWhiteSpace(commandLine[i]) || (quoteCount % 2 != 0)))
{
var current = commandLine[i];
switch (current)
{
case '\\':
var slashCount = 0;
do
{
builder.Append(commandLine[i]);
i++;
slashCount++;
}
while (i < commandLine.Length && commandLine[i] == '\\');

// Slashes not followed by a quote character can be ignored for now
if (i >= commandLine.Length || commandLine[i] != '"')
{
break;
}

// If there is an odd number of slashes then it is escaping the quote
// otherwise it is just a quote.
if (slashCount % 2 == 0)
{
quoteCount++;
}

builder.Append('"');
i++;
break;

case '"':
builder.Append(current);
quoteCount++;
i++;
break;

default:
if ((current >= 0x1 && current <= 0x1f) || current == '|')
{
throw new FormatException($"Illegal character '{current}' at pos {i + 1}.");
}
else
{
builder.Append(current);
}
i++;
break;
}
}

// If the quote string is surrounded by quotes with no interior quotes then
// remove the quotes here.
if (quoteCount == 2 && builder[0] == '"' && builder[builder.Length - 1] == '"')
{
builder.Remove(0, length: 1);
builder.Remove(builder.Length - 1, length: 1);
}

if (builder.Length > 0)
{
yield return builder.ToString();
}
}
}
}

0 comments on commit 3268575

Please sign in to comment.