Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: Fix small->uint->float casts with AVX-512 #112892

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 1 addition & 10 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7287,7 +7287,7 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode)
#endif

var_types dstType = treeNode->CastToType();
var_types srcType = op1->TypeGet();
var_types srcType = genActualType(op1->TypeGet());
assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType));

// Since xarch emitter doesn't handle reporting gc-info correctly while casting away gc-ness we
Expand All @@ -7302,15 +7302,6 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode)
srcType = TYP_I_IMPL;
}

// At this point, we should not see a srcType that is not int or long.
// For conversions from byte/sbyte/int16/uint16 to float/double, we would expect
// either the front-end or lowering phase to have generated two levels of cast.
// The first one is for widening smaller int type to int32 and the second one is
// to the float/double.
// On 32-bit, we expect morph to replace long to float/double casts with helper calls,
// so we should only see int here.
noway_assert(varTypeIsIntOrI(srcType));

// To convert integral type to floating, the cvt[u]si2ss/sd instruction is used
// which does a partial write to lower 4/8 bytes of xmm register keeping the other
// upper bytes unmodified. If "cvt[u]si2ss/sd xmmReg, r32/r64" occurs inside a loop,
Expand Down
50 changes: 8 additions & 42 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -823,7 +823,6 @@ GenTree* Lowering::LowerCast(GenTree* tree)
var_types castToType = tree->CastToType();
var_types dstType = castToType;
var_types srcType = castOp->TypeGet();
var_types tmpType = TYP_UNDEF;

// force the srcType to unsigned if GT_UNSIGNED flag is set
if (tree->IsUnsigned())
Expand All @@ -837,11 +836,14 @@ GenTree* Lowering::LowerCast(GenTree* tree)
// Reason: must be converted to a helper call
// srcType = float/double, castToType = ulong
// Reason: must be converted to a helper call
// srcType = float/double, castToType = byte/sbyte/ushort/short
// Reason: must have intermediate cast to int
// srcType = uint castToType = float/double
// Reason: uint -> float/double = uint -> long -> float/double
if (varTypeIsFloating(srcType))
{
noway_assert(!tree->gtOverflow());
assert(!varTypeIsSmall(dstType));
assert(castToType != TYP_ULONG || comp->canUseEvexEncodingDebugOnly());
}
else if (srcType == TYP_UINT)
Expand Down Expand Up @@ -1164,30 +1166,6 @@ GenTree* Lowering::LowerCast(GenTree* tree)
}
#endif // TARGET_AMD64

// Case of src is a small type and dst is a floating point type.
if (varTypeIsSmall(srcType) && varTypeIsFloating(castToType))
{
// These conversions can never be overflow detecting ones.
noway_assert(!tree->gtOverflow());
tmpType = TYP_INT;
}
// case of src is a floating point type and dst is a small type.
else if (varTypeIsFloating(srcType) && varTypeIsSmall(castToType))
{
tmpType = TYP_INT;
}

if (tmpType != TYP_UNDEF)
{
GenTree* tmp = comp->gtNewCastNode(tmpType, castOp, tree->IsUnsigned(), tmpType);
tmp->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));

tree->gtFlags &= ~GTF_UNSIGNED;
tree->AsOp()->gtOp1 = tmp;
BlockRange().InsertAfter(castOp, tmp);
ContainCheckCast(tmp->AsCast());
}

// Now determine if we have operands that should be contained.
ContainCheckCast(tree->AsCast());
return nullptr;
Expand Down Expand Up @@ -8567,26 +8545,14 @@ void Lowering::ContainCheckCast(GenTreeCast* node)

if (varTypeIsFloating(castToType) || varTypeIsFloating(srcType))
{
#ifdef DEBUG
// If converting to float/double, the operand must be 4 or 8 byte in size.
if (varTypeIsFloating(castToType))
if (castOp->IsCnsNonZeroFltOrDbl())
{
unsigned opSize = genTypeSize(srcType);
assert(opSize == 4 || opSize == 8);
MakeSrcContained(node, castOp);
}
#endif // DEBUG

// U8 -> R8 conversion requires that the operand be in a register.
if (srcType != TYP_ULONG)
else
{
if (castOp->IsCnsNonZeroFltOrDbl())
{
MakeSrcContained(node, castOp);
}
else
{
srcIsContainable = true;
}
// The ulong->floating SSE2 fallback requires the source to be in register
srcIsContainable = !varTypeIsSmall(srcType) && ((srcType != TYP_ULONG) || comp->canUseEvexEncoding());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this varTypeIsSmall check? It is almost certainly wrong.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess it is saying that containing the non-extending loads in casts is ok, which is true. It just reads oddly since that depends on TryMakeSrcContainedOrRegOptional below making that additional check for "is load". I would probably replace this with castOp->isMemoryOp() && !varTypeIsSmall(castOp) && ... just to make it perfectly clear what is being checked here.

}
}
else if (comp->opts.OptimizationEnabled() && varTypeIsIntegral(castOp) && varTypeIsIntegral(castToType))
Expand Down
23 changes: 23 additions & 0 deletions src/tests/JIT/Regression/JitBlue/Runtime_112871/Runtime_112871.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

// Generated by Fuzzlyn v2.4 on 2025-02-23 17:30:17
// Run on X64 Windows
// Seed: 18098778274409643257-vectort,vector128,vector256,x86aes,x86avx,x86avx2,x86avx512bw,x86avx512bwvl,x86avx512cd,x86avx512cdvl,x86avx512dq,x86avx512dqvl,x86avx512f,x86avx512fvl,x86avx512fx64,x86bmi1,x86bmi1x64,x86bmi2,x86bmi2x64,x86fma,x86lzcnt,x86lzcntx64,x86pclmulqdq,x86popcnt,x86popcntx64,x86sse,x86ssex64,x86sse2,x86sse2x64,x86sse3,x86sse41,x86sse41x64,x86sse42,x86sse42x64,x86ssse3,x86x86base
// Reduced from 23.4 KiB to 0.4 KiB in 00:01:16

using System;
using Xunit;

public class Runtime_112871
{
public static short s_1;

[Fact]
public static void Problem()
{
ushort vr0 = default(ushort);
s_1 = -1;
Assert.True((double)0 <= (uint)s_1);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<Optimize>False</Optimize>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs" />
</ItemGroup>
</Project>
Loading