Skip to content

Commit

Permalink
Enable StructEnreg by default on all platforms. (#55558)
Browse files Browse the repository at this point in the history
* enable for arm32.

fix arm32

Fix arm/arm64.

now we can have contained lclRead for other platforms, not only xarch.

* enable x64 unix.

* Fix and enable arm64.

* fix bad merge and arm32 failures.
  • Loading branch information
Sergey Andreenko authored Jul 15, 2021
1 parent 0663b30 commit 90b8ddd
Show file tree
Hide file tree
Showing 13 changed files with 166 additions and 151 deletions.
2 changes: 0 additions & 2 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -866,10 +866,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
// Generate code for a GT_BITCAST that is not contained.
void genCodeForBitCast(GenTreeOp* treeNode);

#if defined(TARGET_XARCH)
// Generate the instruction to move a value between register files
void genBitCast(var_types targetType, regNumber targetReg, var_types srcType, regNumber srcReg);
#endif // TARGET_XARCH

struct GenIntCastDesc
{
Expand Down
16 changes: 11 additions & 5 deletions src/coreclr/jit/codegenarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1044,15 +1044,21 @@ void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree)
//
void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* tree)
{
GenTree* data = tree->gtOp1;

GenTree* data = tree->gtOp1;
GenTree* actualData = data->gtSkipReloadOrCopy();
unsigned regCount = 1;
// var = call, where call returns a multi-reg return value
// case is handled separately.
if (data->gtSkipReloadOrCopy()->IsMultiRegNode())
if (actualData->IsMultiRegNode())
{
genMultiRegStoreToLocal(tree);
regCount = actualData->IsMultiRegLclVar() ? actualData->AsLclVar()->GetFieldCount(compiler)
: actualData->GetMultiRegCount();
if (regCount > 1)
{
genMultiRegStoreToLocal(tree);
}
}
else
if (regCount == 1)
{
unsigned varNum = tree->GetLclNum();
assert(varNum < compiler->lvaCount);
Expand Down
21 changes: 12 additions & 9 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4378,9 +4378,11 @@ void CodeGen::genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode)
{
assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicUpperSave);

GenTree* op1 = simdNode->gtGetOp1();
assert(op1->IsLocal());
assert(emitTypeSize(op1->TypeGet()) == 16);
GenTree* op1 = simdNode->gtGetOp1();
GenTreeLclVar* lclNode = op1->AsLclVar();
LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode);
assert(emitTypeSize(varDsc->GetRegisterType(lclNode)) == 16);

regNumber targetReg = simdNode->GetRegNum();
regNumber op1Reg = genConsumeReg(op1);
assert(op1Reg != REG_NA);
Expand All @@ -4391,8 +4393,7 @@ void CodeGen::genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode)
{
// This is not a normal spill; we'll spill it to the lclVar location.
// The localVar must have a stack home.
unsigned varNum = op1->AsLclVarCommon()->GetLclNum();
LclVarDsc* varDsc = compiler->lvaGetDesc(varNum);
unsigned varNum = lclNode->GetLclNum();
assert(varDsc->lvOnFrame);
// We want to store this to the upper 8 bytes of this localVar's home.
int offset = 8;
Expand Down Expand Up @@ -4429,16 +4430,18 @@ void CodeGen::genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode)

GenTree* op1 = simdNode->gtGetOp1();
assert(op1->IsLocal());
assert(emitTypeSize(op1->TypeGet()) == 16);
GenTreeLclVar* lclNode = op1->AsLclVar();
LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode);
assert(emitTypeSize(varDsc->GetRegisterType(lclNode)) == 16);

regNumber srcReg = simdNode->GetRegNum();
regNumber lclVarReg = genConsumeReg(op1);
unsigned varNum = op1->AsLclVarCommon()->GetLclNum();
regNumber lclVarReg = genConsumeReg(lclNode);
unsigned varNum = lclNode->GetLclNum();
assert(lclVarReg != REG_NA);
assert(srcReg != REG_NA);
if (simdNode->gtFlags & GTF_SPILLED)
{
// The localVar must have a stack home.
LclVarDsc* varDsc = compiler->lvaGetDesc(varNum);
assert(varDsc->lvOnFrame);
// We will load this from the upper 8 bytes of this localVar's home.
int offset = 8;
Expand Down
52 changes: 0 additions & 52 deletions src/coreclr/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1125,58 +1125,6 @@ void CodeGen::genPutArgReg(GenTreeOp* tree)
genProduceReg(tree);
}

//----------------------------------------------------------------------
// genCodeForBitCast - Generate code for a GT_BITCAST that is not contained
//
// Arguments
// treeNode - the GT_BITCAST for which we're generating code
//
void CodeGen::genCodeForBitCast(GenTreeOp* treeNode)
{
regNumber targetReg = treeNode->GetRegNum();
var_types targetType = treeNode->TypeGet();
GenTree* op1 = treeNode->gtGetOp1();
genConsumeRegs(op1);
if (op1->isContained())
{
assert(op1->IsLocal() || op1->isIndir());
op1->gtType = treeNode->TypeGet();
op1->SetRegNum(targetReg);
op1->ClearContained();
JITDUMP("Changing type of BITCAST source to load directly.");
genCodeForTreeNode(op1);
}
else if (varTypeUsesFloatReg(treeNode) != varTypeUsesFloatReg(op1))
{
regNumber srcReg = op1->GetRegNum();
assert(genTypeSize(op1->TypeGet()) == genTypeSize(targetType));
#ifdef TARGET_ARM
if (genTypeSize(targetType) == 8)
{
// Converting between long and double on ARM is a special case.
if (targetType == TYP_LONG)
{
regNumber otherReg = treeNode->AsMultiRegOp()->gtOtherReg;
assert(otherReg != REG_NA);
inst_RV_RV_RV(INS_vmov_d2i, targetReg, otherReg, srcReg, EA_8BYTE);
}
else
{
NYI_ARM("Converting from long to double");
}
}
else
#endif // TARGET_ARM
{
inst_Mov(targetType, targetReg, srcReg, /* canSkip */ false);
}
}
else
{
inst_Mov(targetType, targetReg, genConsumeReg(op1), /* canSkip */ false);
}
}

#if FEATURE_ARG_SPLIT
//---------------------------------------------------------------------
// genPutArgSplit - generate code for a GT_PUTARG_SPLIT node
Expand Down
85 changes: 75 additions & 10 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3783,7 +3783,8 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere

varNum = regArgTab[argNum].varNum;
noway_assert(varNum < compiler->lvaCount);
varDsc = compiler->lvaTable + varNum;
varDsc = compiler->lvaTable + varNum;
const var_types varRegType = varDsc->GetRegisterType();
noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);

/* cannot possibly have stack arguments */
Expand Down Expand Up @@ -3827,7 +3828,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
assert(argNum > 0);
assert(regArgTab[argNum - 1].slot == 1);
assert(regArgTab[argNum - 1].varNum == varNum);
assert((varDsc->lvType == TYP_SIMD12) || (varDsc->lvType == TYP_SIMD16));
assert((varRegType == TYP_SIMD12) || (varRegType == TYP_SIMD16));
regArgMaskLive &= ~genRegMask(regNum);
regArgTab[argNum].circular = false;
change = true;
Expand Down Expand Up @@ -4338,9 +4339,10 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere

varNum = regArgTab[argNum].varNum;
noway_assert(varNum < compiler->lvaCount);
varDsc = compiler->lvaTable + varNum;
var_types regType = regArgTab[argNum].getRegType(compiler);
regNumber regNum = genMapRegArgNumToRegNum(argNum, regType);
varDsc = compiler->lvaTable + varNum;
const var_types regType = regArgTab[argNum].getRegType(compiler);
const regNumber regNum = genMapRegArgNumToRegNum(argNum, regType);
const var_types varRegType = varDsc->GetRegisterType();

#if defined(UNIX_AMD64_ABI)
if (regType == TYP_UNDEF)
Expand Down Expand Up @@ -4439,7 +4441,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
assert(regArgTab[argNum].slot == 2);
assert(argNum > 0);
assert(regArgTab[argNum - 1].slot == 1);
assert((varDsc->lvType == TYP_SIMD12) || (varDsc->lvType == TYP_SIMD16));
assert((varRegType == TYP_SIMD12) || (varRegType == TYP_SIMD16));
destRegNum = varDsc->GetRegNum();
noway_assert(regNum != destRegNum);
continue;
Expand Down Expand Up @@ -4509,7 +4511,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
noway_assert(regArgTab[nextArgNum].varNum == varNum);
// Emit a shufpd with a 0 immediate, which preserves the 0th element of the dest reg
// and moves the 0th element of the src reg into the 1st element of the dest reg.
GetEmitter()->emitIns_R_R_I(INS_shufpd, emitActualTypeSize(varDsc->lvType), destRegNum, nextRegNum, 0);
GetEmitter()->emitIns_R_R_I(INS_shufpd, emitActualTypeSize(varRegType), destRegNum, nextRegNum, 0);
// Set destRegNum to regNum so that we skip the setting of the register below,
// but mark argNum as processed and clear regNum from the live mask.
destRegNum = regNum;
Expand Down Expand Up @@ -11245,11 +11247,15 @@ void CodeGen::genStructReturn(GenTree* treeNode)
assert(regCount <= MAX_RET_REG_COUNT);

#if FEATURE_MULTIREG_RET
// Right now the only enregisterable structs supported are SIMD vector types.
if (genIsRegCandidateLocal(actualOp1))
{
// Right now the only enregisterable structs supported are SIMD vector types.
assert(varTypeIsSIMD(op1));
assert(!actualOp1->AsLclVar()->IsMultiReg());
#if defined(DEBUG)
const GenTreeLclVar* lclVar = actualOp1->AsLclVar();
const LclVarDsc* varDsc = compiler->lvaGetDesc(lclVar);
assert(varTypeIsSIMD(varDsc->GetRegisterType()));
assert(!lclVar->IsMultiReg());
#endif // DEBUG
#ifdef FEATURE_SIMD
genSIMDSplitReturn(op1, &retTypeDesc);
#endif // FEATURE_SIMD
Expand Down Expand Up @@ -11329,6 +11335,7 @@ void CodeGen::genMultiRegStoreToLocal(GenTreeLclVar* lclNode)
assert(op1->IsMultiRegNode());
unsigned regCount =
actualOp1->IsMultiRegLclVar() ? actualOp1->AsLclVar()->GetFieldCount(compiler) : actualOp1->GetMultiRegCount();
assert(regCount > 1);

// Assumption: current implementation requires that a multi-reg
// var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from
Expand Down Expand Up @@ -12580,3 +12587,61 @@ void CodeGen::genPoisonFrame(regMaskTP regLiveIn)
}
}
}

//----------------------------------------------------------------------
// genBitCast - Generate the instruction to move a value between register files
//
// Arguments
// targetType - the destination type
// targetReg - the destination register
// srcType - the source type
// srcReg - the source register
//
void CodeGen::genBitCast(var_types targetType, regNumber targetReg, var_types srcType, regNumber srcReg)
{
const bool srcFltReg = varTypeUsesFloatReg(srcType) || varTypeIsSIMD(srcType);
assert(srcFltReg == genIsValidFloatReg(srcReg));

const bool dstFltReg = varTypeUsesFloatReg(targetType) || varTypeIsSIMD(targetType);
assert(dstFltReg == genIsValidFloatReg(targetReg));

inst_Mov(targetType, targetReg, srcReg, /* canSkip */ true);
}

//----------------------------------------------------------------------
// genCodeForBitCast - Generate code for a GT_BITCAST that is not contained
//
// Arguments
// treeNode - the GT_BITCAST for which we're generating code
//
void CodeGen::genCodeForBitCast(GenTreeOp* treeNode)
{
regNumber targetReg = treeNode->GetRegNum();
var_types targetType = treeNode->TypeGet();
GenTree* op1 = treeNode->gtGetOp1();
genConsumeRegs(op1);

if (op1->isContained())
{
assert(op1->IsLocal() || op1->isIndir());
if (genIsRegCandidateLocal(op1))
{
unsigned lclNum = op1->AsLclVar()->GetLclNum();
GetEmitter()->emitIns_R_S(ins_Load(treeNode->TypeGet(), compiler->isSIMDTypeLocalAligned(lclNum)),
emitTypeSize(treeNode), targetReg, lclNum, 0);
}
else
{
op1->gtType = treeNode->TypeGet();
op1->SetRegNum(targetReg);
op1->ClearContained();
JITDUMP("Changing type of BITCAST source to load directly.\n");
genCodeForTreeNode(op1);
}
}
else
{
genBitCast(targetType, targetReg, op1->TypeGet(), op1->GetRegNum());
}
genProduceReg(treeNode);
}
2 changes: 1 addition & 1 deletion src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1577,7 +1577,6 @@ void CodeGen::genConsumeRegs(GenTree* tree)
{
genConsumeAddress(tree);
}
#ifdef TARGET_XARCH
else if (tree->OperIsLocalRead())
{
// A contained lcl var must be living on stack and marked as reg optional, or not be a
Expand All @@ -1591,6 +1590,7 @@ void CodeGen::genConsumeRegs(GenTree* tree)
// Update the life of the lcl var.
genUpdateLife(tree);
}
#ifdef TARGET_XARCH
#ifdef FEATURE_HW_INTRINSICS
else if (tree->OperIs(GT_HWINTRINSIC))
{
Expand Down
58 changes: 0 additions & 58 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7073,64 +7073,6 @@ void CodeGen::genIntrinsic(GenTree* treeNode)
genProduceReg(treeNode);
}

//----------------------------------------------------------------------
// genBitCast - Generate the instruction to move a value between register files
//
// Arguments
// targetType - the destination type
// targetReg - the destination register
// srcType - the source type
// srcReg - the source register
//
void CodeGen::genBitCast(var_types targetType, regNumber targetReg, var_types srcType, regNumber srcReg)
{
const bool srcFltReg = varTypeUsesFloatReg(srcType) || varTypeIsSIMD(srcType);
assert(srcFltReg == genIsValidFloatReg(srcReg));

const bool dstFltReg = varTypeUsesFloatReg(targetType) || varTypeIsSIMD(targetType);
assert(dstFltReg == genIsValidFloatReg(targetReg));

inst_Mov(targetType, targetReg, srcReg, /* canSkip */ true);
}

//----------------------------------------------------------------------
// genCodeForBitCast - Generate code for a GT_BITCAST that is not contained
//
// Arguments
// treeNode - the GT_BITCAST for which we're generating code
//
void CodeGen::genCodeForBitCast(GenTreeOp* treeNode)
{
regNumber targetReg = treeNode->GetRegNum();
var_types targetType = treeNode->TypeGet();
GenTree* op1 = treeNode->gtGetOp1();
genConsumeRegs(op1);

if (op1->isContained())
{
assert(op1->IsLocal() || op1->isIndir());
if (genIsRegCandidateLocal(op1))
{
unsigned lclNum = op1->AsLclVar()->GetLclNum();
GetEmitter()->emitIns_R_S(ins_Load(treeNode->TypeGet(), compiler->isSIMDTypeLocalAligned(lclNum)),
emitTypeSize(treeNode), targetReg, lclNum, 0);
}
else
{
op1->gtType = treeNode->TypeGet();
op1->SetRegNum(targetReg);
op1->ClearContained();
JITDUMP("Changing type of BITCAST source to load directly.");
genCodeForTreeNode(op1);
}
}
else
{
genBitCast(targetType, targetReg, op1->TypeGet(), op1->GetRegNum());
}
genProduceReg(treeNode);
}

//-------------------------------------------------------------------------- //
// getBaseVarForPutArgStk - returns the baseVarNum for passing a stack arg.
//
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -7630,11 +7630,13 @@ class Compiler
#if defined(TARGET_AMD64)
static bool varTypeNeedsPartialCalleeSave(var_types type)
{
assert(type != TYP_STRUCT);
return (type == TYP_SIMD32);
}
#elif defined(TARGET_ARM64)
static bool varTypeNeedsPartialCalleeSave(var_types type)
{
assert(type != TYP_STRUCT);
// ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes
// For SIMD types longer than 8 bytes Caller is responsible for saving and restoring Upper bytes.
return ((type == TYP_SIMD16) || (type == TYP_SIMD12));
Expand Down
5 changes: 0 additions & 5 deletions src/coreclr/jit/jitconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -556,12 +556,7 @@ CONFIG_INTEGER(JitSaveFpLrWithCalleeSavedRegisters, W("JitSaveFpLrWithCalleeSave
#endif // defined(TARGET_ARM64)
#endif // DEBUG

#if defined(TARGET_WINDOWS) && defined(TARGET_XARCH)
CONFIG_INTEGER(JitEnregStructLocals, W("JitEnregStructLocals"), 1) // Allow to enregister locals with struct type.
#else
CONFIG_INTEGER(JitEnregStructLocals, W("JitEnregStructLocals"), 0) // Don't allow to enregister locals with struct type
// yet.
#endif

#undef CONFIG_INTEGER
#undef CONFIG_STRING
Expand Down
Loading

0 comments on commit 90b8ddd

Please sign in to comment.