Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove unnecessary allocations in Hash task. #7162

Merged
merged 11 commits into from
Jan 21, 2022
72 changes: 72 additions & 0 deletions src/Tasks.UnitTests/Hash_Tests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,78 @@ public void HashTaskEmptyInputTest()
Assert.Null(zeroLengthItemsHash);
}

[Fact]
public void HashTaskLargeInputCountTest()
{
// This hash was pre-computed. If the implementation changes it may need to be adjusted.
var expectedHash = "8a996bbcb5e481981c2fba7ac408e20d0b4360a5";

ITaskItem[] itemsToHash = new ITaskItem[1000];
for (int i = 0; i < itemsToHash.Length; i++)
{
itemsToHash[i] = new TaskItem($"Item{i}");
}

var actualHash = ExecuteHashTask(itemsToHash);
Assert.Equal(expectedHash, actualHash);
}

[Fact]
public void HashTaskLargeInputSizeTest()
{
// This hash was pre-computed. If the implementation changes it may need to be adjusted.
var expectedHash = "0509142dd3d3a733f30a52a0eec37cd727d46122";

string[] array = new string[1000];
for (int i = 0; i < array.Length; i++)
{
array[i] = $"Item{i}";
}
ITaskItem[] itemsToHash = new ITaskItem[] { new TaskItem(string.Join("", array)) };

var actualHash = ExecuteHashTask(itemsToHash);
Assert.Equal(expectedHash, actualHash);
}

#pragma warning disable CA5350
// This test verifies that hash computes correctly for various numbers of characters.
// We would like to process edge of the buffer use cases regardless on the size of the buffer.
[Fact]
public void HashTaskDifferentInputSizesTest()
{
int maxInputSize = 2000;
string input = "";
using (var sha1 = System.Security.Cryptography.SHA1.Create())
{
var stringBuilder = new System.Text.StringBuilder(sha1.HashSize);
MockEngine mockEngine = new();
for (int i = 0; i < maxInputSize; i++)
{
input += "a";

Hash hashTask = new()
{
BuildEngine = mockEngine,
ItemsToHash = new ITaskItem[] { new TaskItem(input) },
IgnoreCase = false
};
Assert.True(hashTask.Execute());
string actualHash = hashTask.HashResult;

byte[] hash = sha1.ComputeHash(System.Text.Encoding.UTF8.GetBytes(input + '\u2028'));
stringBuilder.Clear();
foreach (var b in hash)
{
stringBuilder.Append(b.ToString("x2"));
}
string expectedHash = stringBuilder.ToString();

Assert.Equal(expectedHash, actualHash);
}
}
}
#pragma warning restore CA5350

[Fact]
public void HashTaskIgnoreCaseTest()
{
Expand Down
109 changes: 83 additions & 26 deletions src/Tasks/Hash.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
using System.Security.Cryptography;
using System.Text;
using Microsoft.Build.Framework;
using Microsoft.Build.Shared;

namespace Microsoft.Build.Tasks
{
Expand All @@ -19,6 +18,16 @@ namespace Microsoft.Build.Tasks
public class Hash : TaskExtension
{
private const char ItemSeparatorCharacter = '\u2028';
private static readonly Encoding s_encoding = Encoding.UTF8;
private static readonly byte[] s_itemSeparatorCharacterBytes = s_encoding.GetBytes(new char[] { ItemSeparatorCharacter });

// Size of buffer where bytes of the strings are stored until sha1.TransformBlock is to be run on them.
// It is needed to get a balance between amount of costly sha1.TransformBlock calls and amount of allocated memory.
private const int Sha1BufferSize = 512;

// Size of chunks in which ItemSpecs would be cut.
// We have chosen this length so itemSpecChunkByteBuffer rented from ArrayPool will be close but not bigger than 512.
private const int MaxInputChunkLength = 169;

/// <summary>
/// Items from which to generate a hash.
Expand Down Expand Up @@ -46,52 +55,100 @@ public override bool Execute()
{
using (var sha1 = SHA1.Create())
{
var concatenatedItemStringSize = ComputeStringSize(ItemsToHash);
// Buffer in which bytes of the strings are to be stored until their number reaches the limit size.
// Once the limit is reached, the sha1.TransformBlock is to be run on all the bytes of this buffer.
byte[] sha1Buffer = null;

var hashStringSize = sha1.HashSize;
// Buffer in which bytes of items' ItemSpec are to be stored.
byte[] itemSpecChunkByteBuffer = null;

using (var stringBuilder = new ReuseableStringBuilder(Math.Max(concatenatedItemStringSize, hashStringSize)))
try
{
foreach (var item in ItemsToHash)
sha1Buffer = System.Buffers.ArrayPool<byte>.Shared.Rent(Sha1BufferSize);
itemSpecChunkByteBuffer = System.Buffers.ArrayPool<byte>.Shared.Rent(s_encoding.GetMaxByteCount(MaxInputChunkLength));

int sha1BufferPosition = 0;
for (int i = 0; i < ItemsToHash.Length; i++)
{
string itemSpec = item.ItemSpec;
stringBuilder.Append(IgnoreCase ? itemSpec.ToUpperInvariant() : itemSpec);
stringBuilder.Append(ItemSeparatorCharacter);
}
string itemSpec = IgnoreCase ? ItemsToHash[i].ItemSpec.ToUpperInvariant() : ItemsToHash[i].ItemSpec;

var hash = sha1.ComputeHash(Encoding.UTF8.GetBytes(stringBuilder.ToString()));
// Slice the itemSpec string into chunks of reasonable size and add them to sha1 buffer.
for (int itemSpecPosition = 0; itemSpecPosition < itemSpec.Length; itemSpecPosition += MaxInputChunkLength)
{
int charsToProcess = Math.Min(itemSpec.Length - itemSpecPosition, MaxInputChunkLength);
int byteCount = s_encoding.GetBytes(itemSpec, itemSpecPosition, charsToProcess, itemSpecChunkByteBuffer, 0);

stringBuilder.Clear();
sha1BufferPosition = AddBytesToSha1Buffer(sha1, sha1Buffer, sha1BufferPosition, Sha1BufferSize, itemSpecChunkByteBuffer, byteCount);
}

foreach (var b in hash)
{
stringBuilder.Append(b.ToString("x2"));
sha1BufferPosition = AddBytesToSha1Buffer(sha1, sha1Buffer, sha1BufferPosition, Sha1BufferSize, s_itemSeparatorCharacterBytes, s_itemSeparatorCharacterBytes.Length);
}

HashResult = stringBuilder.ToString();
sha1.TransformFinalBlock(sha1Buffer, 0, sha1BufferPosition);

using (var stringBuilder = new ReuseableStringBuilder(sha1.HashSize))
{
foreach (var b in sha1.Hash)
{
stringBuilder.Append(b.ToString("x2"));
}
HashResult = stringBuilder.ToString();
}
}
finally
{
if (sha1Buffer != null)
{
System.Buffers.ArrayPool<byte>.Shared.Return(sha1Buffer);
}
if (itemSpecChunkByteBuffer != null)
{
System.Buffers.ArrayPool<byte>.Shared.Return(itemSpecChunkByteBuffer);
}
}
}
}

return true;
}

private int ComputeStringSize(ITaskItem[] itemsToHash)
/// <summary>
/// Add bytes to the sha1 buffer. Once the limit size is reached, sha1.TransformBlock is called and the buffer is flushed.
/// </summary>
/// <param name="sha1">Hashing algorithm sha1.</param>
/// <param name="sha1Buffer">The sha1 buffer which stores bytes of the strings. Bytes should be added to this buffer.</param>
/// <param name="sha1BufferPosition">Number of used bytes of the sha1 buffer.</param>
/// <param name="sha1BufferSize">The size of sha1 buffer.</param>
/// <param name="byteBuffer">Bytes buffer which contains bytes to be written to sha1 buffer.</param>
/// <param name="byteCount">Amount of bytes that are to be added to sha1 buffer.</param>
/// <returns>Updated sha1BufferPosition.</returns>
private int AddBytesToSha1Buffer(SHA1 sha1, byte[] sha1Buffer, int sha1BufferPosition, int sha1BufferSize, byte[] byteBuffer, int byteCount)
{
if (itemsToHash.Length == 0)
int bytesProcessed = 0;
while (sha1BufferPosition + byteCount >= sha1BufferSize)
{
return 0;
}
int sha1BufferFreeSpace = sha1BufferSize - sha1BufferPosition;

var totalItemSize = 0;
if (sha1BufferPosition == 0)
{
// If sha1 buffer is empty and bytes number is big enough there is no need to copy bytes to sha1 buffer.
// Pass the bytes to TransformBlock right away.
sha1.TransformBlock(byteBuffer, bytesProcessed, sha1BufferSize, null, 0);
}
else
{
Array.Copy(byteBuffer, bytesProcessed, sha1Buffer, sha1BufferPosition, sha1BufferFreeSpace);
sha1.TransformBlock(sha1Buffer, 0, sha1BufferSize, null, 0);
sha1BufferPosition = 0;
}

foreach (var item in itemsToHash)
{
totalItemSize += item.ItemSpec.Length;
bytesProcessed += sha1BufferFreeSpace;
byteCount -= sha1BufferFreeSpace;
}

// Add one ItemSeparatorCharacter per item
return totalItemSize + itemsToHash.Length;
Array.Copy(byteBuffer, bytesProcessed, sha1Buffer, sha1BufferPosition, byteCount);
sha1BufferPosition += byteCount;

return sha1BufferPosition;
}
}
}