Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

expander optimization #11069

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
167 changes: 115 additions & 52 deletions src/Build/Evaluation/Expander.cs
Original file line number Diff line number Diff line change
Expand Up @@ -273,9 +273,6 @@ private void FlushFirstValueIfNeeded()
/// Enabled by ExpanderOptions.Truncate.
/// </summary>
private const int ItemLimitPerExpansion = 3;
private static readonly char[] s_singleQuoteChar = { '\'' };
private static readonly char[] s_backtickChar = { '`' };
private static readonly char[] s_doubleQuoteChar = { '"' };

/// <summary>
/// Those characters which indicate that an expression may contain expandable
Expand Down Expand Up @@ -775,47 +772,95 @@ private static int ScanForClosingQuote(char quoteChar, string expression, int in
}

/// <summary>
/// Add the argument in the StringBuilder to the arguments list, handling nulls
/// Add the argument represented by the slices to the arguments list, handling nulls
/// appropriately.
/// </summary>
private static void AddArgument(List<string> arguments, SpanBasedStringBuilder argumentBuilder)
private static void AddArgumentFromSlices(List<string> arguments, List<Tuple<int, int>> slices, string arg)
{
// we reached the end of an argument, add the builder's final result
// to our arguments.
argumentBuilder.Trim();
string argValue = argumentBuilder.ToString();
// This shouldn't happen
SimaTian marked this conversation as resolved.
Show resolved Hide resolved
int firstSlice = 0;
int lastSlice = slices.Count - 1;
if (lastSlice == -1)
{
arguments.Add("");
return;
}
// from end
int lastSliceIdx = slices[slices.Count - 1].Item2;
// from start
int firstSliceIdx = slices[0].Item1;

// We support passing of null through the argument constant value null
if (String.Equals("null", argValue, StringComparison.OrdinalIgnoreCase))
// Trim from the start
while (firstSlice <= lastSlice && Char.IsWhiteSpace(arg, firstSliceIdx))
{
arguments.Add(null);
firstSliceIdx++;
if (firstSliceIdx > slices[firstSlice].Item2 && ++firstSlice < lastSlice)
{
firstSliceIdx = slices[firstSlice].Item1;
}
}
else

// Trim from the end.
// Bit of extra logic to avoid trimming whitespace-only string one time too many.
SimaTian marked this conversation as resolved.
Show resolved Hide resolved
while (((firstSlice < lastSlice) || (firstSlice == lastSlice && firstSliceIdx < lastSliceIdx)) && Char.IsWhiteSpace(arg, lastSliceIdx - 1))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it looks like (firstSlice <= lastSlice && Char.IsWhiteSpace(arg, firstSliceIdx) can be moved to the separate method with descriptive name and reused in the while loop here and on the line 794

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This checks checks against lastSliceIndx and also checks to prevent duplicate whitespace removal (the second part after the ||)
It was similar-ish before, then I run into the "all whitespace" edge case and crashed.
that is what the firstSlice == lastSlice && firstSliceIdx < lastSliceIdx is for.
So the function would have to be something like
IsEdgeWhitespace(arg, firstSlice, lastSlice, idxToCheck) and then some extra logic for this case.
Is that more readable? I would like to say no, but that could be just my laziness/lack of feeling for cases such as this.

{
if (argValue.Length > 0)
lastSliceIdx--;
if (slices[lastSlice].Item1 > lastSliceIdx && firstSlice < --lastSlice)
{
if (argValue[0] == '\'' && argValue[argValue.Length - 1] == '\'')
{
arguments.Add(argValue.Trim(s_singleQuoteChar));
}
else if (argValue[0] == '`' && argValue[argValue.Length - 1] == '`')
{
arguments.Add(argValue.Trim(s_backtickChar));
}
else if (argValue[0] == '"' && argValue[argValue.Length - 1] == '"')
{
arguments.Add(argValue.Trim(s_doubleQuoteChar));
}
else
{
arguments.Add(argValue);
}
lastSliceIdx = slices[lastSlice].Item2;
}
else
}

if (firstSliceIdx == lastSliceIdx)
{
arguments.Add("");
return;
}

bool removedQuotes = false;

if ((arg[firstSliceIdx] == '\'' && arg[lastSliceIdx - 1] == '\'') ||
(arg[firstSliceIdx] == '`' && arg[lastSliceIdx - 1] == '`') ||
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why did you remove the static fields for these repetitive chars?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comparison didn't use them (I'm not 100% sure why, but I kept it that way). Since I'm working with indices, I didn't need them for .Trim() calls anymore so they became unused and the ./build.cmd started complaining.

(arg[firstSliceIdx] == '`' && arg[lastSliceIdx - 1] == '`') ||
(arg[firstSliceIdx] == '"' && arg[lastSliceIdx - 1] == '"'))
{
++firstSliceIdx;
--lastSliceIdx;
SimaTian marked this conversation as resolved.
Show resolved Hide resolved
removedQuotes = true;

// Check yet again if we're still in the correct slice boundaries, this could've changed if we've trimmed.
if (firstSliceIdx > slices[firstSlice].Item2 && ++firstSlice < lastSlice)
SimaTian marked this conversation as resolved.
Show resolved Hide resolved
{
arguments.Add(argValue);
firstSliceIdx = slices[firstSlice].Item1;
}
if (slices[lastSlice].Item1 > lastSliceIdx && firstSlice < --lastSlice)
SimaTian marked this conversation as resolved.
Show resolved Hide resolved
{
lastSliceIdx = slices[lastSlice].Item2;
}
}

string argValue = "";

while (firstSlice < lastSlice)
SimaTian marked this conversation as resolved.
Show resolved Hide resolved
{
argValue += arg.Substring(firstSliceIdx, slices[firstSlice].Item2 - firstSliceIdx);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am a bit confused why would this improve perf, i guess there are usually few arguments. In general repeated concatenations are perf antipattern. Couldn't the StringTools.SpanBasedStringBuilder be optimized instead?

Copy link
Member Author

@SimaTian SimaTian Dec 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From what I saw/googled, the concat should be faster / similar speedwise up to 3-4 concatenations, after that the stringbuilder is faster.
Most of resolved variables have 1 or 2 slices to concatenate(beyond the initial empty string) so I opted for the simplicity.

There is an argument to be made to have a split there based on number of slices and use a stringbuilder for 3+.
As for optimizing SpanBasedStringBuilder - that is not our code but something from Microsoft.NET.StringTools.
I think that the main difference is that for most cases, the span based string builder is an overkill.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As for the perf improvement, the first two lines of the previous version function were doing this:

  argumentBuilder.Trim();
  string argValue = argumentBuilder.ToString();

and then doing everything with String, throwing away any and all advantage the spanbased stringbuilder might've had.
So there is an option to kill most of my changes and replace the string with SpanBasedChar - it could achieve similar if not identical results. The main cost would be probably further profiling since it's rather large replacement once again.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bit more googling and having a static StringBuilder and reusing the instance should have the best from the both worlds - reasonable performance for small concatenation counts while avoiding the danger of allocation.

firstSlice++;
firstSliceIdx = slices[firstSlice].Item1;
}

if (firstSlice == lastSlice)
{
argValue += arg.Substring(firstSliceIdx, lastSliceIdx - firstSliceIdx);
}

if (!removedQuotes && String.Equals("null", argValue, StringComparison.OrdinalIgnoreCase))
{
arguments.Add(null);
}
else
{
arguments.Add(argValue);
}
}

/// <summary>
Expand All @@ -830,18 +875,14 @@ private static string[] ExtractFunctionArguments(IElementLocation elementLocatio

List<string> arguments = new List<string>();

using SpanBasedStringBuilder argumentBuilder = Strings.GetSpanBasedStringBuilder();
int? argumentStartIndex = null;
int abStart = -1;
SimaTian marked this conversation as resolved.
Show resolved Hide resolved
List<Tuple<int, int>> slices = new List<Tuple<int, int>>();
SimaTian marked this conversation as resolved.
Show resolved Hide resolved

// We iterate over the string in the for loop below. When we find an argument, instead of adding it to the argument
// builder one-character-at-a-time, we remember the start index and then call this function when we find the end of
// the argument. This appends the entire {start, end} span to the builder in one call.
void FlushCurrentArgumentToArgumentBuilder(int argumentEndIndex)
{
if (argumentStartIndex.HasValue)
void FlushToSlices(int argumentEndIndex) {
if (abStart != -1)
{
argumentBuilder.Append(argumentsString, argumentStartIndex.Value, argumentEndIndex - argumentStartIndex.Value);
argumentStartIndex = null;
slices.Add(Tuple.Create(abStart, argumentEndIndex));
abStart = -1;
}
}

Expand All @@ -863,8 +904,8 @@ void FlushCurrentArgumentToArgumentBuilder(int argumentEndIndex)
ProjectErrorUtilities.ThrowInvalidProject(elementLocation, "InvalidFunctionPropertyExpression", expressionFunction, AssemblyResources.GetString("InvalidFunctionPropertyExpressionDetailMismatchedParenthesis"));
}

FlushCurrentArgumentToArgumentBuilder(argumentEndIndex: nestedPropertyStart);
argumentBuilder.Append(argumentsString, nestedPropertyStart, (n - nestedPropertyStart) + 1);
FlushToSlices(nestedPropertyStart);
slices.Add(Tuple.Create(nestedPropertyStart, n + 1));
}
else if (argumentsString[n] == '`' || argumentsString[n] == '"' || argumentsString[n] == '\'')
{
Expand All @@ -878,32 +919,36 @@ void FlushCurrentArgumentToArgumentBuilder(int argumentEndIndex)
ProjectErrorUtilities.ThrowInvalidProject(elementLocation, "InvalidFunctionPropertyExpression", expressionFunction, AssemblyResources.GetString("InvalidFunctionPropertyExpressionDetailMismatchedQuote"));
}

FlushCurrentArgumentToArgumentBuilder(argumentEndIndex: quoteStart);
argumentBuilder.Append(argumentsString, quoteStart, (n - quoteStart) + 1);
FlushToSlices(quoteStart);
slices.Add(Tuple.Create(quoteStart, n + 1));
}
else if (argumentsString[n] == ',')
{
FlushCurrentArgumentToArgumentBuilder(argumentEndIndex: n);
FlushToSlices(n);

// We have reached the end of the current argument, go ahead and add it
// to our list
AddArgument(arguments, argumentBuilder);
AddArgumentFromSlices(arguments, slices, argumentsString);

// Clear out the argument builder ready for the next argument
argumentBuilder.Clear();
slices.Clear();
}
else
{
argumentStartIndex ??= n;
// argumentStartIndex ??= n;
SimaTian marked this conversation as resolved.
Show resolved Hide resolved
if (abStart == -1)
{
abStart = n;
}
}
}

// We reached the end of the string but we may have seen the start but not the end of the last (or only) argument so flush it now.
FlushCurrentArgumentToArgumentBuilder(argumentEndIndex: argumentsContentLength);
FlushToSlices(argumentsContentLength);

// This will either be the one and only argument, or the last one
// so add it to our list
AddArgument(arguments, argumentBuilder);
AddArgumentFromSlices(arguments, slices, argumentsString);

return arguments.ToArray();
}
Expand Down Expand Up @@ -2012,6 +2057,23 @@ internal static bool ExpandExpressionCapture<S>(
// If there are no items of the given type, then bail out early
if (itemsOfType.Count == 0)
{
if (expressionCapture.Captures?.Any(capture =>
{
if (string.Equals(capture.FunctionName, "Count", StringComparison.OrdinalIgnoreCase))
{
return true;
}
if (string.Equals(capture.FunctionName, "AnyHaveMetadataValue", StringComparison.OrdinalIgnoreCase))
SimaTian marked this conversation as resolved.
Show resolved Hide resolved
{
return true;
}
return false;
}) != true)
SimaTian marked this conversation as resolved.
Show resolved Hide resolved
{
itemsFromCapture = new List<KeyValuePair<string, S>>();
return false;
}
/*
// ... but only if there isn't a function "Count", since that will want to return something (zero) for an empty list
if (expressionCapture.Captures?.Any(capture => string.Equals(capture.FunctionName, "Count", StringComparison.OrdinalIgnoreCase)) != true)
{
Expand All @@ -2022,6 +2084,7 @@ internal static bool ExpandExpressionCapture<S>(
return false;
}
}
*/
}

if (expressionCapture.Captures != null)
Expand Down
10 changes: 8 additions & 2 deletions src/StringTools/WeakStringCache.Concurrent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ namespace Microsoft.NET.StringTools
internal sealed partial class WeakStringCache : IDisposable
{
private readonly ConcurrentDictionary<int, StringWeakHandle> _stringsByHashCode;
private int _count;
JanKrivanek marked this conversation as resolved.
Show resolved Hide resolved
JanProvaznik marked this conversation as resolved.
Show resolved Hide resolved

public WeakStringCache()
{
Expand Down Expand Up @@ -62,11 +63,15 @@ public string GetOrCreateEntry(ref InternableString internable, out bool cacheHi

handle = new StringWeakHandle();
handle.SetString(result);
_stringsByHashCode.TryAdd(hashCode, handle);
if (_stringsByHashCode.TryAdd(hashCode, handle))
{
Interlocked.Add(ref _count, 1);
}


// Remove unused handles if our heuristic indicates that it would be productive.
int scavengeThreshold = _scavengeThreshold;
if (_stringsByHashCode.Count >= scavengeThreshold)
if (_count >= scavengeThreshold)
{
// Before we start scavenging set _scavengeThreshold to a high value to effectively lock other threads from
// running Scavenge at the same time.
Expand All @@ -81,6 +86,7 @@ public string GetOrCreateEntry(ref InternableString internable, out bool cacheHi
{
// And do this again when the number of handles reaches double the current after-scavenge number.
_scavengeThreshold = _stringsByHashCode.Count * 2;
_count = _stringsByHashCode.Count;
}
}
}
Expand Down