Skip to content

Commit

Permalink
Remove unnecessary allocations in Hash task. (#7162)
Browse files Browse the repository at this point in the history
Fixes #7086

### Context
`Hash.Execute()` allocates a string which gets to the large object heap. This could be avoided without changing the resulting hash function.

### Changes Made
Hash function is rewritten.

### Testing
Unit tests & manual testing
  • Loading branch information
AR-May authored Jan 21, 2022
1 parent 8872ed6 commit f69c8fb
Show file tree
Hide file tree
Showing 2 changed files with 155 additions and 26 deletions.
72 changes: 72 additions & 0 deletions src/Tasks.UnitTests/Hash_Tests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,78 @@ public void HashTaskEmptyInputTest()
Assert.Null(zeroLengthItemsHash);
}

[Fact]
public void HashTaskLargeInputCountTest()
{
// This hash was pre-computed. If the implementation changes it may need to be adjusted.
var expectedHash = "8a996bbcb5e481981c2fba7ac408e20d0b4360a5";

ITaskItem[] itemsToHash = new ITaskItem[1000];
for (int i = 0; i < itemsToHash.Length; i++)
{
itemsToHash[i] = new TaskItem($"Item{i}");
}

var actualHash = ExecuteHashTask(itemsToHash);
Assert.Equal(expectedHash, actualHash);
}

[Fact]
public void HashTaskLargeInputSizeTest()
{
// This hash was pre-computed. If the implementation changes it may need to be adjusted.
var expectedHash = "0509142dd3d3a733f30a52a0eec37cd727d46122";

string[] array = new string[1000];
for (int i = 0; i < array.Length; i++)
{
array[i] = $"Item{i}";
}
ITaskItem[] itemsToHash = new ITaskItem[] { new TaskItem(string.Join("", array)) };

var actualHash = ExecuteHashTask(itemsToHash);
Assert.Equal(expectedHash, actualHash);
}

#pragma warning disable CA5350
// This test verifies that hash computes correctly for various numbers of characters.
// We would like to process edge of the buffer use cases regardless on the size of the buffer.
[Fact]
public void HashTaskDifferentInputSizesTest()
{
int maxInputSize = 2000;
string input = "";
using (var sha1 = System.Security.Cryptography.SHA1.Create())
{
var stringBuilder = new System.Text.StringBuilder(sha1.HashSize);
MockEngine mockEngine = new();
for (int i = 0; i < maxInputSize; i++)
{
input += "a";

Hash hashTask = new()
{
BuildEngine = mockEngine,
ItemsToHash = new ITaskItem[] { new TaskItem(input) },
IgnoreCase = false
};
Assert.True(hashTask.Execute());
string actualHash = hashTask.HashResult;

byte[] hash = sha1.ComputeHash(System.Text.Encoding.UTF8.GetBytes(input + '\u2028'));
stringBuilder.Clear();
foreach (var b in hash)
{
stringBuilder.Append(b.ToString("x2"));
}
string expectedHash = stringBuilder.ToString();

Assert.Equal(expectedHash, actualHash);
}
}
}
#pragma warning restore CA5350

[Fact]
public void HashTaskIgnoreCaseTest()
{
Expand Down
109 changes: 83 additions & 26 deletions src/Tasks/Hash.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
using System.Security.Cryptography;
using System.Text;
using Microsoft.Build.Framework;
using Microsoft.Build.Shared;

#nullable disable

Expand All @@ -24,6 +23,16 @@ namespace Microsoft.Build.Tasks
public class Hash : TaskExtension
{
private const char ItemSeparatorCharacter = '\u2028';
private static readonly Encoding s_encoding = Encoding.UTF8;
private static readonly byte[] s_itemSeparatorCharacterBytes = s_encoding.GetBytes(new char[] { ItemSeparatorCharacter });

// Size of buffer where bytes of the strings are stored until sha1.TransformBlock is to be run on them.
// It is needed to get a balance between amount of costly sha1.TransformBlock calls and amount of allocated memory.
private const int Sha1BufferSize = 512;

// Size of chunks in which ItemSpecs would be cut.
// We have chosen this length so itemSpecChunkByteBuffer rented from ArrayPool will be close but not bigger than 512.
private const int MaxInputChunkLength = 169;

/// <summary>
/// Items from which to generate a hash.
Expand Down Expand Up @@ -52,52 +61,100 @@ public override bool Execute()
{
using (var sha1 = SHA1.Create())
{
var concatenatedItemStringSize = ComputeStringSize(ItemsToHash);
// Buffer in which bytes of the strings are to be stored until their number reaches the limit size.
// Once the limit is reached, the sha1.TransformBlock is to be run on all the bytes of this buffer.
byte[] sha1Buffer = null;

var hashStringSize = sha1.HashSize;
// Buffer in which bytes of items' ItemSpec are to be stored.
byte[] itemSpecChunkByteBuffer = null;

using (var stringBuilder = new ReuseableStringBuilder(Math.Max(concatenatedItemStringSize, hashStringSize)))
try
{
foreach (var item in ItemsToHash)
sha1Buffer = System.Buffers.ArrayPool<byte>.Shared.Rent(Sha1BufferSize);
itemSpecChunkByteBuffer = System.Buffers.ArrayPool<byte>.Shared.Rent(s_encoding.GetMaxByteCount(MaxInputChunkLength));

int sha1BufferPosition = 0;
for (int i = 0; i < ItemsToHash.Length; i++)
{
string itemSpec = item.ItemSpec;
stringBuilder.Append(IgnoreCase ? itemSpec.ToUpperInvariant() : itemSpec);
stringBuilder.Append(ItemSeparatorCharacter);
}
string itemSpec = IgnoreCase ? ItemsToHash[i].ItemSpec.ToUpperInvariant() : ItemsToHash[i].ItemSpec;

var hash = sha1.ComputeHash(Encoding.UTF8.GetBytes(stringBuilder.ToString()));
// Slice the itemSpec string into chunks of reasonable size and add them to sha1 buffer.
for (int itemSpecPosition = 0; itemSpecPosition < itemSpec.Length; itemSpecPosition += MaxInputChunkLength)
{
int charsToProcess = Math.Min(itemSpec.Length - itemSpecPosition, MaxInputChunkLength);
int byteCount = s_encoding.GetBytes(itemSpec, itemSpecPosition, charsToProcess, itemSpecChunkByteBuffer, 0);

stringBuilder.Clear();
sha1BufferPosition = AddBytesToSha1Buffer(sha1, sha1Buffer, sha1BufferPosition, Sha1BufferSize, itemSpecChunkByteBuffer, byteCount);
}

foreach (var b in hash)
{
stringBuilder.Append(b.ToString("x2"));
sha1BufferPosition = AddBytesToSha1Buffer(sha1, sha1Buffer, sha1BufferPosition, Sha1BufferSize, s_itemSeparatorCharacterBytes, s_itemSeparatorCharacterBytes.Length);
}

HashResult = stringBuilder.ToString();
sha1.TransformFinalBlock(sha1Buffer, 0, sha1BufferPosition);

using (var stringBuilder = new ReuseableStringBuilder(sha1.HashSize))
{
foreach (var b in sha1.Hash)
{
stringBuilder.Append(b.ToString("x2"));
}
HashResult = stringBuilder.ToString();
}
}
finally
{
if (sha1Buffer != null)
{
System.Buffers.ArrayPool<byte>.Shared.Return(sha1Buffer);
}
if (itemSpecChunkByteBuffer != null)
{
System.Buffers.ArrayPool<byte>.Shared.Return(itemSpecChunkByteBuffer);
}
}
}
}

return true;
}

private int ComputeStringSize(ITaskItem[] itemsToHash)
/// <summary>
/// Add bytes to the sha1 buffer. Once the limit size is reached, sha1.TransformBlock is called and the buffer is flushed.
/// </summary>
/// <param name="sha1">Hashing algorithm sha1.</param>
/// <param name="sha1Buffer">The sha1 buffer which stores bytes of the strings. Bytes should be added to this buffer.</param>
/// <param name="sha1BufferPosition">Number of used bytes of the sha1 buffer.</param>
/// <param name="sha1BufferSize">The size of sha1 buffer.</param>
/// <param name="byteBuffer">Bytes buffer which contains bytes to be written to sha1 buffer.</param>
/// <param name="byteCount">Amount of bytes that are to be added to sha1 buffer.</param>
/// <returns>Updated sha1BufferPosition.</returns>
private int AddBytesToSha1Buffer(SHA1 sha1, byte[] sha1Buffer, int sha1BufferPosition, int sha1BufferSize, byte[] byteBuffer, int byteCount)
{
if (itemsToHash.Length == 0)
int bytesProcessed = 0;
while (sha1BufferPosition + byteCount >= sha1BufferSize)
{
return 0;
}
int sha1BufferFreeSpace = sha1BufferSize - sha1BufferPosition;

var totalItemSize = 0;
if (sha1BufferPosition == 0)
{
// If sha1 buffer is empty and bytes number is big enough there is no need to copy bytes to sha1 buffer.
// Pass the bytes to TransformBlock right away.
sha1.TransformBlock(byteBuffer, bytesProcessed, sha1BufferSize, null, 0);
}
else
{
Array.Copy(byteBuffer, bytesProcessed, sha1Buffer, sha1BufferPosition, sha1BufferFreeSpace);
sha1.TransformBlock(sha1Buffer, 0, sha1BufferSize, null, 0);
sha1BufferPosition = 0;
}

foreach (var item in itemsToHash)
{
totalItemSize += item.ItemSpec.Length;
bytesProcessed += sha1BufferFreeSpace;
byteCount -= sha1BufferFreeSpace;
}

// Add one ItemSeparatorCharacter per item
return totalItemSize + itemsToHash.Length;
Array.Copy(byteBuffer, bytesProcessed, sha1Buffer, sha1BufferPosition, byteCount);
sha1BufferPosition += byteCount;

return sha1BufferPosition;
}
}
}

0 comments on commit f69c8fb

Please sign in to comment.