diff --git a/src/Tasks.UnitTests/Hash_Tests.cs b/src/Tasks.UnitTests/Hash_Tests.cs index e2b64378d47..9d0aed2e921 100644 --- a/src/Tasks.UnitTests/Hash_Tests.cs +++ b/src/Tasks.UnitTests/Hash_Tests.cs @@ -43,6 +43,78 @@ public void HashTaskEmptyInputTest() Assert.Null(zeroLengthItemsHash); } + [Fact] + public void HashTaskLargeInputCountTest() + { + // This hash was pre-computed. If the implementation changes it may need to be adjusted. + var expectedHash = "8a996bbcb5e481981c2fba7ac408e20d0b4360a5"; + + ITaskItem[] itemsToHash = new ITaskItem[1000]; + for (int i = 0; i < itemsToHash.Length; i++) + { + itemsToHash[i] = new TaskItem($"Item{i}"); + } + + var actualHash = ExecuteHashTask(itemsToHash); + Assert.Equal(expectedHash, actualHash); + } + + [Fact] + public void HashTaskLargeInputSizeTest() + { + // This hash was pre-computed. If the implementation changes it may need to be adjusted. + var expectedHash = "0509142dd3d3a733f30a52a0eec37cd727d46122"; + + string[] array = new string[1000]; + for (int i = 0; i < array.Length; i++) + { + array[i] = $"Item{i}"; + } + ITaskItem[] itemsToHash = new ITaskItem[] { new TaskItem(string.Join("", array)) }; + + var actualHash = ExecuteHashTask(itemsToHash); + Assert.Equal(expectedHash, actualHash); + } + +#pragma warning disable CA5350 + // This test verifies that hash computes correctly for various numbers of characters. + // We would like to process edge of the buffer use cases regardless on the size of the buffer. + [Fact] + public void HashTaskDifferentInputSizesTest() + { + int maxInputSize = 2000; + string input = ""; + using (var sha1 = System.Security.Cryptography.SHA1.Create()) + { + var stringBuilder = new System.Text.StringBuilder(sha1.HashSize); + MockEngine mockEngine = new(); + for (int i = 0; i < maxInputSize; i++) + { + input += "a"; + + Hash hashTask = new() + { + BuildEngine = mockEngine, + ItemsToHash = new ITaskItem[] { new TaskItem(input) }, + IgnoreCase = false + }; + Assert.True(hashTask.Execute()); + string actualHash = hashTask.HashResult; + + byte[] hash = sha1.ComputeHash(System.Text.Encoding.UTF8.GetBytes(input + '\u2028')); + stringBuilder.Clear(); + foreach (var b in hash) + { + stringBuilder.Append(b.ToString("x2")); + } + string expectedHash = stringBuilder.ToString(); + + Assert.Equal(expectedHash, actualHash); + } + } + } +#pragma warning restore CA5350 + [Fact] public void HashTaskIgnoreCaseTest() { diff --git a/src/Tasks/Hash.cs b/src/Tasks/Hash.cs index 81699764e51..b4beb876015 100644 --- a/src/Tasks/Hash.cs +++ b/src/Tasks/Hash.cs @@ -5,7 +5,6 @@ using System.Security.Cryptography; using System.Text; using Microsoft.Build.Framework; -using Microsoft.Build.Shared; namespace Microsoft.Build.Tasks { @@ -19,6 +18,16 @@ namespace Microsoft.Build.Tasks public class Hash : TaskExtension { private const char ItemSeparatorCharacter = '\u2028'; + private static readonly Encoding s_encoding = Encoding.UTF8; + private static readonly byte[] s_itemSeparatorCharacterBytes = s_encoding.GetBytes(new char[] { ItemSeparatorCharacter }); + + // Size of buffer where bytes of the strings are stored until sha1.TransformBlock is to be run on them. + // It is needed to get a balance between amount of costly sha1.TransformBlock calls and amount of allocated memory. + private const int Sha1BufferSize = 512; + + // Size of chunks in which ItemSpecs would be cut. + // We have chosen this length so itemSpecChunkByteBuffer rented from ArrayPool will be close but not bigger than 512. + private const int MaxInputChunkLength = 169; /// /// Items from which to generate a hash. @@ -46,52 +55,100 @@ public override bool Execute() { using (var sha1 = SHA1.Create()) { - var concatenatedItemStringSize = ComputeStringSize(ItemsToHash); + // Buffer in which bytes of the strings are to be stored until their number reaches the limit size. + // Once the limit is reached, the sha1.TransformBlock is to be run on all the bytes of this buffer. + byte[] sha1Buffer = null; - var hashStringSize = sha1.HashSize; + // Buffer in which bytes of items' ItemSpec are to be stored. + byte[] itemSpecChunkByteBuffer = null; - using (var stringBuilder = new ReuseableStringBuilder(Math.Max(concatenatedItemStringSize, hashStringSize))) + try { - foreach (var item in ItemsToHash) + sha1Buffer = System.Buffers.ArrayPool.Shared.Rent(Sha1BufferSize); + itemSpecChunkByteBuffer = System.Buffers.ArrayPool.Shared.Rent(s_encoding.GetMaxByteCount(MaxInputChunkLength)); + + int sha1BufferPosition = 0; + for (int i = 0; i < ItemsToHash.Length; i++) { - string itemSpec = item.ItemSpec; - stringBuilder.Append(IgnoreCase ? itemSpec.ToUpperInvariant() : itemSpec); - stringBuilder.Append(ItemSeparatorCharacter); - } + string itemSpec = IgnoreCase ? ItemsToHash[i].ItemSpec.ToUpperInvariant() : ItemsToHash[i].ItemSpec; - var hash = sha1.ComputeHash(Encoding.UTF8.GetBytes(stringBuilder.ToString())); + // Slice the itemSpec string into chunks of reasonable size and add them to sha1 buffer. + for (int itemSpecPosition = 0; itemSpecPosition < itemSpec.Length; itemSpecPosition += MaxInputChunkLength) + { + int charsToProcess = Math.Min(itemSpec.Length - itemSpecPosition, MaxInputChunkLength); + int byteCount = s_encoding.GetBytes(itemSpec, itemSpecPosition, charsToProcess, itemSpecChunkByteBuffer, 0); - stringBuilder.Clear(); + sha1BufferPosition = AddBytesToSha1Buffer(sha1, sha1Buffer, sha1BufferPosition, Sha1BufferSize, itemSpecChunkByteBuffer, byteCount); + } - foreach (var b in hash) - { - stringBuilder.Append(b.ToString("x2")); + sha1BufferPosition = AddBytesToSha1Buffer(sha1, sha1Buffer, sha1BufferPosition, Sha1BufferSize, s_itemSeparatorCharacterBytes, s_itemSeparatorCharacterBytes.Length); } - HashResult = stringBuilder.ToString(); + sha1.TransformFinalBlock(sha1Buffer, 0, sha1BufferPosition); + + using (var stringBuilder = new ReuseableStringBuilder(sha1.HashSize)) + { + foreach (var b in sha1.Hash) + { + stringBuilder.Append(b.ToString("x2")); + } + HashResult = stringBuilder.ToString(); + } + } + finally + { + if (sha1Buffer != null) + { + System.Buffers.ArrayPool.Shared.Return(sha1Buffer); + } + if (itemSpecChunkByteBuffer != null) + { + System.Buffers.ArrayPool.Shared.Return(itemSpecChunkByteBuffer); + } } } } - return true; } - private int ComputeStringSize(ITaskItem[] itemsToHash) + /// + /// Add bytes to the sha1 buffer. Once the limit size is reached, sha1.TransformBlock is called and the buffer is flushed. + /// + /// Hashing algorithm sha1. + /// The sha1 buffer which stores bytes of the strings. Bytes should be added to this buffer. + /// Number of used bytes of the sha1 buffer. + /// The size of sha1 buffer. + /// Bytes buffer which contains bytes to be written to sha1 buffer. + /// Amount of bytes that are to be added to sha1 buffer. + /// Updated sha1BufferPosition. + private int AddBytesToSha1Buffer(SHA1 sha1, byte[] sha1Buffer, int sha1BufferPosition, int sha1BufferSize, byte[] byteBuffer, int byteCount) { - if (itemsToHash.Length == 0) + int bytesProcessed = 0; + while (sha1BufferPosition + byteCount >= sha1BufferSize) { - return 0; - } + int sha1BufferFreeSpace = sha1BufferSize - sha1BufferPosition; - var totalItemSize = 0; + if (sha1BufferPosition == 0) + { + // If sha1 buffer is empty and bytes number is big enough there is no need to copy bytes to sha1 buffer. + // Pass the bytes to TransformBlock right away. + sha1.TransformBlock(byteBuffer, bytesProcessed, sha1BufferSize, null, 0); + } + else + { + Array.Copy(byteBuffer, bytesProcessed, sha1Buffer, sha1BufferPosition, sha1BufferFreeSpace); + sha1.TransformBlock(sha1Buffer, 0, sha1BufferSize, null, 0); + sha1BufferPosition = 0; + } - foreach (var item in itemsToHash) - { - totalItemSize += item.ItemSpec.Length; + bytesProcessed += sha1BufferFreeSpace; + byteCount -= sha1BufferFreeSpace; } - // Add one ItemSeparatorCharacter per item - return totalItemSize + itemsToHash.Length; + Array.Copy(byteBuffer, bytesProcessed, sha1Buffer, sha1BufferPosition, byteCount); + sha1BufferPosition += byteCount; + + return sha1BufferPosition; } } }