Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unroll String.Equals for constant input [0..16] length #64821

Closed
wants to merge 21 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions src/coreclr/jit/inlinepolicy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -735,15 +735,6 @@ double DefaultPolicy::DetermineMultiplier()
JITDUMP("\nInline candidate has arg that feeds range check. Multiplier increased to %g.", multiplier);
}

if (m_ConstArgFeedsIsKnownConst || (m_ArgFeedsIsKnownConst && m_IsPrejitRoot))
{
// if we use RuntimeHelpers.IsKnownConstant we most likely expect our function to be always inlined
// at least in the case of constant arguments. In IsPrejitRoot we don't have callsite info so let's
// assume we have a constant here in order to avoid "baked" noinline
multiplier += 20;
JITDUMP("\nConstant argument feeds RuntimeHelpers.IsKnownConstant. Multiplier increased to %g.", multiplier);
}

if (m_ConstantArgFeedsConstantTest > 0)
{
multiplier += 3.0;
Expand Down Expand Up @@ -1629,6 +1620,15 @@ double ExtendedDefaultPolicy::DetermineMultiplier()
}
}

if (m_ConstArgFeedsIsKnownConst || (m_ArgFeedsIsKnownConst && m_IsPrejitRoot))
{
// if we use RuntimeHelpers.IsKnownConstant we most likely expect our function to be always inlined
// at least in the case of constant arguments. In IsPrejitRoot we don't have callsite info so let's
// assume we have a constant here in order to avoid "baked" noinline
multiplier += 20;
JITDUMP("\nConstant argument feeds RuntimeHelpers.IsKnownConstant. Multiplier increased to %g.", multiplier);
}

if (m_ArgFeedsConstantTest > 0)
{
multiplier += m_IsPrejitRoot ? 3.0 : 1.0;
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5224,6 +5224,12 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)

noway_assert(elemTyp != TYP_STRUCT || elemStructType != nullptr);

if (opts.OptimizationEnabled() && fgGlobalMorph)
{
// Fold possible constant expressions
asIndex->Index() = gtFoldExpr(asIndex->Index());
EgorBo marked this conversation as resolved.
Show resolved Hide resolved
}

// Fold "cns_str"[cns_index] to ushort constant
// NOTE: don't do it for empty string, the operation will fail anyway
if (opts.OptimizationEnabled() && asIndex->Arr()->OperIs(GT_CNS_STR) &&
Expand Down
133 changes: 120 additions & 13 deletions src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text.Unicode;
using System.Buffers.Binary;
using System.Runtime.Intrinsics;

using Internal.Runtime.CompilerServices;

Expand Down Expand Up @@ -670,32 +672,137 @@ public bool Equals([NotNullWhen(true)] string? value, StringComparison compariso
}
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static ulong ReadUInt64(string str) =>
Unsafe.ReadUnaligned<ulong>(ref Unsafe.As<char, byte>(ref str._firstChar));

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static ulong ReadUInt64(string str, nint offset) =>
Unsafe.ReadUnaligned<ulong>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref str._firstChar, offset)));

// Determines whether two Strings match.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool Equals(string? a, string? b)
{
// Transform 'str == ""' to 'str != null && str.Length == 0' if either a or b are jit-time
// constants. Otherwise, these two blocks are eliminated
if (RuntimeHelpers.IsKnownConstant(a) && a != null && a.Length == 0)
#if TARGET_64BIT && !MONO && !BIGENDIAN
// Try to unroll Equals in case of constant 'b' for b.Length in [0..16] range
if (RuntimeHelpers.IsKnownConstant(b) && !RuntimeHelpers.IsKnownConstant(a) && b != null)
{
return b != null && b.Length == 0;
// Unroll using SWAR
if (b.Length <= 7)
{
return EqualsUnrolled_0_to_7(a, b);
}
// Unroll using two Vector128s
else if (b.Length <= 16 && Vector128.IsHardwareAccelerated)
{
return EqualsUnrolled_9_to_16(a, b);
}
// NOTE: for some values we can emit a more optimal codegen e.g. for Length 7 and 8
// we can use a single Vector128 or add Vector256 path for Length in [16..32] range
// but we need to be careful here with inliner's budget
}

if (RuntimeHelpers.IsKnownConstant(b) && b != null && b.Length == 0)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
static bool EqualsUnrolled_0_to_7(string? a, string b)
{
return a != null && a.Length == 0;
}
if (a != null)
{
if (b.Length == 0)
{
// Fold Equals(a, "") to 'a != null && a.Length == 0'
return a.Length == 0;
}

if (object.ReferenceEquals(a, b))
{
return true;
// For Length 1 and 2 we rely on fact that even an empty string is 8 bytes long (on 64bit)
// [ 4b Length ][ 2b _firstChar ][ 2b padding ]
// so we can check Length and first 2 chars in a single operation
if (b.Length == 1)
{
return ReadUInt64(a, -2) == (((ulong)b[0] << 32) | 1UL);
MihaZupan marked this conversation as resolved.
Show resolved Hide resolved
}
if (b.Length == 2)
{
return ReadUInt64(a, -2) == (((ulong)b[1] << 48) |
((ulong)b[0] << 32) | 2UL);
}

if (b.Length == 3)
{
// Load ch1, ch2, ch3 and \0 into ulong
return a.Length == 3 &&
ReadUInt64(a) == (((ulong)b[2] << 32) |
((ulong)b[1] << 16) | b[0]);
}

ulong v1 = ReadUInt64(a);
ulong cns1 = ((ulong)b[3] << 48) |
((ulong)b[2] << 32) |
((ulong)b[1] << 16) | b[0];

if (b.Length == 4)
{
// Load ch1, ch2, ch3 and ch4 into ulong
return a.Length == 4 && v1 == cns1;
}

// Handle Length [5..7] via two ulong (overlapped)
return a.Length == b.Length && v1 == cns1 &&
ReadUInt64(a, b.Length - 4) == (((ulong)b[b.Length - 1] << 48) |
((ulong)b[b.Length - 2] << 32) |
((ulong)b[b.Length - 3] << 16) |
((ulong)b[b.Length - 4] << 0));
}

// a is null when b is a known non-null
return false;
}

if (a is null || b is null || a.Length != b.Length)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
static bool EqualsUnrolled_9_to_16(string? a, string b)
{
if (a != null && a.Length == b.Length)
{
// Load 'a' into two vectors with overlapping.
Vector128<ushort> v2 = Vector128.LoadUnsafe(
ref Unsafe.As<char, ushort>(ref a._firstChar), (nuint)b.Length - 8);
Vector128<ushort> v1 = Vector128.LoadUnsafe(
ref Unsafe.As<char, ushort>(ref a._firstChar));

// ((v1 ^ cns1) | (v2 ^ cns2)) == zero
return ((v1 ^ Vector128.Create(
b[0], b[1], b[2], b[3],
b[4], b[5], b[6], b[7])) |
(v2 ^ Vector128.Create(
// b[b.Length - c] are folded to constants
b[b.Length - 8], b[b.Length - 7],
b[b.Length - 6], b[b.Length - 5],
b[b.Length - 4], b[b.Length - 3],
b[b.Length - 2], b[b.Length - 1]))) == Vector128<ushort>.Zero;
}
// a is null when b is a known non-null
return false;
}

return EqualsHelper(a, b);
// Two-Vector256s-impl can be basically a copy-paste of ^ with wider vectors to handle inputs [17..32]
// but we need to tune inliner's budget first
#endif
return EqualsFallback(a, b);

static bool EqualsFallback(string? a, string? b)
{
if (object.ReferenceEquals(a, b))
{
return true;
}

if (a is null || b is null || a.Length != b.Length)
{
return false;
}

return EqualsHelper(a, b);
}
}

public static bool Equals(string? a, string? b, StringComparison comparisonType)
Expand Down Expand Up @@ -939,7 +1046,7 @@ static int GetNonRandomizedHashCodeOrdinalIgnoreCaseSlow(string str)
//
public bool StartsWith(string value!!)
{
return StartsWith(value, StringComparison.CurrentCulture);
return CultureInfo.CurrentCulture.CompareInfo.IsPrefix(this, value, GetCaseCompareOfComparisonCulture(StringComparison.CurrentCulture));
}

public bool StartsWith(string value!!, StringComparison comparisonType)
Expand Down
Loading