Skip to content

Commit

Permalink
Cleanup BitArray code
Browse files Browse the repository at this point in the history
* This commit addresses some of the comments raised during the initial PR of the vectorised BitArray code, such as using new API/intrinsics. Also highlights a couple potential improvements.
  • Loading branch information
Gnbrkm41 committed Oct 1, 2020
1 parent 073edfc commit 6a6a650
Showing 1 changed file with 18 additions and 26 deletions.
44 changes: 18 additions & 26 deletions src/libraries/System.Collections/src/System/Collections/BitArray.cs
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,8 @@ public unsafe BitArray(bool[] values)
// Same logic as SSE2 path, however we lack MoveMask (equivalent) instruction
// As a workaround, mask out the relevant bit after comparison
// and combine by ORing all of them together (In this case, adding all of them does the same thing)

// Future opportunity: those two loads can be combined into a single `LDP` (See dotnet/runtime#37014)
Vector128<byte> lowerVector = AdvSimd.LoadVector128((byte*)ptr + i);
Vector128<byte> lowerIsFalse = AdvSimd.CompareEqual(lowerVector, zero);
Vector128<byte> bitsExtracted1 = AdvSimd.And(lowerIsFalse, s_bitMask128);
Expand Down Expand Up @@ -634,7 +636,7 @@ public unsafe BitArray Not()
uint i = 0;
if (Avx2.IsSupported)
{
Vector256<int> ones = Vector256.Create(-1);
Vector256<int> ones = Vector256<int>.AllBitsSet;
fixed (int* ptr = thisArray)
{
for (; i < (uint)count - (Vector256IntCount - 1u); i += Vector256IntCount)
Expand All @@ -646,7 +648,7 @@ public unsafe BitArray Not()
}
else if (Sse2.IsSupported)
{
Vector128<int> ones = Vector128.Create(-1);
Vector128<int> ones = Vector128<int>.AllBitsSet;
fixed (int* ptr = thisArray)
{
for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount)
Expand Down Expand Up @@ -839,11 +841,14 @@ public int Length
}
}

// The mask used when shuffling a single int into Vector128/256.
// The shuffle control mask used when shuffling a single int into Vector128/256.
// On little endian machines, the lower 8 bits of int belong in the first byte, next lower 8 in the second and so on.
// (And on big endian, upper 8 bits in the first byte, next upper 8 bits in the second...)
// We place the bytes that contain the bits to its respective byte so that we can mask out only the relevant bits later.
private static readonly Vector128<byte> s_lowerShuffleMask_CopyToBoolArray = Vector128.Create(0, 0x01010101_01010101).AsByte();
private static readonly Vector128<byte> s_upperShuffleMask_CopyToBoolArray = Vector128.Create(0x02020202_02020202, 0x03030303_03030303).AsByte();
private static readonly Vector128<byte> s_lowerShuffleMask_CopyToBoolArray =
BitConverter.IsLittleEndian ? Vector128.Create(0, 0x01010101_01010101).AsByte() : Vector128.Create(0x03030303_03030303, 0x02020202_02020202).AsByte();
private static readonly Vector128<byte> s_upperShuffleMask_CopyToBoolArray =
BitConverter.IsLittleEndian ? Vector128.Create(0x02020202_02020202, 0x03030303_03030303).AsByte() : Vector128.Create(0x01010101_01010101, 0).AsByte();

public unsafe void CopyTo(Array array, int index)
{
Expand Down Expand Up @@ -985,36 +990,23 @@ public unsafe void CopyTo(Array array, int index)
else if (AdvSimd.IsSupported)
{
Vector128<byte> ones = Vector128.Create((byte)1);
Vector128<byte> lowerIndices = s_lowerShuffleMask_CopyToBoolArray;
Vector128<byte> upperIndices = s_upperShuffleMask_CopyToBoolArray;

fixed (bool* destination = &boolArray[index])
{
// Future opportunity: those two stores can be combined into a single `STP` (See dotnet/runtime#33532)
for (; (i + Vector128ByteCount * 2u) <= (uint)m_length; i += Vector128ByteCount * 2u)
{
int bits = m_array[i / (uint)BitsPerInt32];
// Same logic as SSSE3 path, except we do not have Shuffle instruction.
// (TableVectorLookup could be an alternative - dotnet/runtime#1277)
// Instead we use chained ZIP1/2 instructions:
// (A0 is the byte containing LSB, A3 is the byte containing MSB)
// bits (on Big endian) - A3 A2 A1 A0
// bits (Little endian) / Byte reversal - A0 A1 A2 A3
// v1 = Vector128.Create - A0 A1 A2 A3 A0 A1 A2 A3 A0 A1 A2 A3 A0 A1 A2 A3
// v2 = ZipLow(v1, v1) - A0 A0 A1 A1 A2 A2 A3 A3 A0 A0 A1 A1 A2 A2 A3 A3
// v3 = ZipLow(v2, v2) - A0 A0 A0 A0 A1 A1 A1 A1 A2 A2 A2 A2 A3 A3 A3 A3
// shuffledLower = ZipLow(v3, v3) - A0 A0 A0 A0 A0 A0 A0 A0 A1 A1 A1 A1 A1 A1 A1 A1
// shuffledHigher = ZipHigh(v3, v3) - A2 A2 A2 A2 A2 A2 A2 A2 A3 A3 A3 A3 A3 A3 A3 A3
if (!BitConverter.IsLittleEndian)
{
bits = BinaryPrimitives.ReverseEndianness(bits);
}
Vector128<byte> vector = Vector128.Create(bits).AsByte();
vector = AdvSimd.Arm64.ZipLow(vector, vector);
vector = AdvSimd.Arm64.ZipLow(vector, vector);

Vector128<byte> shuffledLower = AdvSimd.Arm64.ZipLow(vector, vector);
Vector128<byte> vector = Vector128.CreateScalarUnsafe(bits).AsByte();

Vector128<byte> shuffledLower = AdvSimd.Arm64.VectorTableLookup(vector, lowerIndices);
Vector128<byte> extractedLower = AdvSimd.And(shuffledLower, s_bitMask128);
Vector128<byte> normalizedLower = AdvSimd.Min(extractedLower, ones);
AdvSimd.Store((byte*)destination + i, normalizedLower);

Vector128<byte> shuffledHigher = AdvSimd.Arm64.ZipHigh(vector, vector);
Vector128<byte> shuffledHigher = AdvSimd.Arm64.VectorTableLookup(vector, upperIndices);
Vector128<byte> extractedHigher = AdvSimd.And(shuffledHigher, s_bitMask128);
Vector128<byte> normalizedHigher = AdvSimd.Min(extractedHigher, ones);
AdvSimd.Store((byte*)destination + i + Vector128<byte>.Count, normalizedHigher);
Expand Down

0 comments on commit 6a6a650

Please sign in to comment.