From 2f5f1d51dcde05ad67503c9dc56b8cb563467714 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 16 May 2021 21:51:12 -0700 Subject: [PATCH] Refactor codegen and emit to centralize `mov reg, reg` handling (#52661) * Refactor codegen and emit to centralize `mov reg, reg` handling * Responding to PR feedback --- src/coreclr/jit/codegen.h | 15 + src/coreclr/jit/codegenarm.cpp | 62 ++- src/coreclr/jit/codegenarm64.cpp | 140 +++---- src/coreclr/jit/codegenarmarch.cpp | 88 ++-- src/coreclr/jit/codegencommon.cpp | 76 ++-- src/coreclr/jit/codegenlinear.cpp | 47 +-- src/coreclr/jit/codegenxarch.cpp | 334 ++++++--------- src/coreclr/jit/emitarm.cpp | 286 +++++++++---- src/coreclr/jit/emitarm.h | 8 + src/coreclr/jit/emitarm64.cpp | 283 ++++++++----- src/coreclr/jit/emitarm64.h | 6 +- src/coreclr/jit/emitxarch.cpp | 431 ++++++++++++-------- src/coreclr/jit/emitxarch.h | 3 + src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 72 +--- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 62 ++- src/coreclr/jit/instr.cpp | 54 ++- src/coreclr/jit/simdcodegenxarch.cpp | 148 +++---- 17 files changed, 1141 insertions(+), 974 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 25597bdf6cd5f..99cc72d8b128b 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1369,6 +1369,21 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void inst_RV(instruction ins, regNumber reg, var_types type, emitAttr size = EA_UNKNOWN); + void inst_Mov(var_types dstType, + regNumber dstReg, + regNumber srcReg, + bool canSkip, + emitAttr size = EA_UNKNOWN, + insFlags flags = INS_FLAGS_DONT_CARE); + + void inst_Mov_Extend(var_types srcType, + bool srcInReg, + regNumber dstReg, + regNumber srcReg, + bool canSkip, + emitAttr size = EA_UNKNOWN, + insFlags flags = INS_FLAGS_DONT_CARE); + void inst_RV_RV(instruction ins, regNumber reg1, regNumber reg2, diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index 4c352ff9f648a..2341ebeba001a 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -203,7 +203,7 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, if (GetEmitter()->isLowRegister(reg) && (imm_hi16 == 0xffff) && ((imm_lo16 & 0x8000) == 0x8000)) { - GetEmitter()->emitIns_R_R(INS_sxth, EA_4BYTE, reg, reg); + GetEmitter()->emitIns_Mov(INS_sxth, EA_4BYTE, reg, reg, /* canSkip */ false); } else { @@ -211,7 +211,7 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, } if (flags == INS_FLAGS_SET) - GetEmitter()->emitIns_R_R(INS_mov, size, reg, reg, INS_FLAGS_SET); + GetEmitter()->emitIns_Mov(INS_mov, size, reg, reg, /* canSkip */ false, INS_FLAGS_SET); } } @@ -260,7 +260,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre float f = forceCastToFloat(constValue); genSetRegToIcon(tmpReg, *((int*)(&f))); - GetEmitter()->emitIns_R_R(INS_vmov_i2f, EA_4BYTE, targetReg, tmpReg); + GetEmitter()->emitIns_Mov(INS_vmov_i2f, EA_4BYTE, targetReg, tmpReg, /* canSkip */ false); } else { @@ -550,7 +550,7 @@ void CodeGen::genLclHeap(GenTree* tree) inst_JMP(EJ_lo, done); // Update SP to be at the next page of stack that we will tickle - GetEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, regTmp); + GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_SPBASE, regTmp, /* canSkip */ false); // Jump to loop and tickle new stack address inst_JMP(EJ_jmp, loop); @@ -559,7 +559,7 @@ void CodeGen::genLclHeap(GenTree* tree) genDefineTempLabel(done); // Now just move the final value to SP - GetEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, regCnt); + GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_SPBASE, regCnt, /* canSkip */ false); // lastTouchDelta is dynamic, and can be up to a page. So if we have outgoing arg space, // we're going to assume the worst and probe. @@ -591,7 +591,7 @@ void CodeGen::genLclHeap(GenTree* tree) else // stackAdjustment == 0 { // Move the final value of SP to regCnt - inst_RV_RV(INS_mov, regCnt, REG_SPBASE); + inst_Mov(TYP_I_IMPL, regCnt, REG_SPBASE, /* canSkip */ false); } BAILOUT: @@ -926,10 +926,7 @@ void CodeGen::genCodeForShiftLong(GenTree* tree) regNumber regResult = (oper == GT_LSH_HI) ? regHi : regLo; - if (regResult != tree->GetRegNum()) - { - inst_RV_RV(INS_mov, tree->GetRegNum(), regResult, targetType); - } + inst_Mov(targetType, tree->GetRegNum(), regResult, /* canSkip */ true); if (oper == GT_LSH_HI) { @@ -1015,7 +1012,7 @@ void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree) if (targetType == TYP_FLOAT) { regNumber floatAsInt = tree->GetSingleTempReg(); - emit->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, floatAsInt, dataReg); + emit->emitIns_Mov(INS_vmov_f2i, EA_4BYTE, floatAsInt, dataReg, /* canSkip */ false); emit->emitIns_R_R(INS_str, EA_4BYTE, floatAsInt, addr); } else @@ -1093,11 +1090,9 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* tree) } else // store into register (i.e move into register) { - if (dataReg != targetReg) - { - // Assign into targetReg when dataReg (from op1) is not the same register - inst_RV_RV(ins_Copy(targetType), targetReg, dataReg, targetType); - } + // Assign into targetReg when dataReg (from op1) is not the same register + inst_Mov(targetType, targetReg, dataReg, /* canSkip */ true); + genProduceReg(tree); } } @@ -1185,13 +1180,13 @@ void CodeGen::genCkfinite(GenTree* treeNode) // Extract and sign-extend the exponent into an integer register if (targetType == TYP_FLOAT) { - emit->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, intReg, fpReg); + emit->emitIns_Mov(INS_vmov_f2i, EA_4BYTE, intReg, fpReg, /* canSkip */ false); emit->emitIns_R_R_I_I(INS_sbfx, EA_4BYTE, intReg, intReg, 23, 8); } else { assert(targetType == TYP_DOUBLE); - emit->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, intReg, REG_NEXT(fpReg)); + emit->emitIns_Mov(INS_vmov_f2i, EA_4BYTE, intReg, REG_NEXT(fpReg), /* canSkip */ false); emit->emitIns_R_R_I_I(INS_sbfx, EA_4BYTE, intReg, intReg, 20, 11); } @@ -1200,10 +1195,8 @@ void CodeGen::genCkfinite(GenTree* treeNode) genJumpToThrowHlpBlk(EJ_eq, SCK_ARITH_EXCPN); // If it's a finite value, copy it to targetReg - if (targetReg != fpReg) - { - emit->emitIns_R_R(ins_Copy(targetType), emitTypeSize(treeNode), targetReg, fpReg); - } + inst_Mov(targetType, targetReg, fpReg, /* canSkip */ true, emitTypeSize(treeNode)); + genProduceReg(treeNode); } @@ -1313,16 +1306,10 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) noway_assert(data->GetRegNum() != REG_ARG_0); // addr goes in REG_ARG_0 - if (addr->GetRegNum() != REG_ARG_0) - { - inst_RV_RV(INS_mov, REG_ARG_0, addr->GetRegNum(), addr->TypeGet()); - } + inst_Mov(addr->TypeGet(), REG_ARG_0, addr->GetRegNum(), /* canSkip */ true); // data goes in REG_ARG_1 - if (data->GetRegNum() != REG_ARG_1) - { - inst_RV_RV(INS_mov, REG_ARG_1, data->GetRegNum(), data->TypeGet()); - } + inst_Mov(data->TypeGet(), REG_ARG_1, data->GetRegNum(), /* canSkip */ true); genGCWriteBarrier(tree, writeBarrierForm); } @@ -1425,10 +1412,7 @@ void CodeGen::genLongToIntCast(GenTree* cast) } } - if (dstReg != loSrcReg) - { - inst_RV_RV(INS_mov, dstReg, loSrcReg, TYP_INT, EA_4BYTE); - } + inst_Mov(TYP_INT, dstReg, loSrcReg, /* canSkip */ true); genProduceReg(cast); } @@ -1491,7 +1475,7 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode) genConsumeOperands(treeNode->AsOp()); assert(insVcvt != INS_invalid); - GetEmitter()->emitIns_R_R(INS_vmov_i2f, srcSize, treeNode->GetRegNum(), op1->GetRegNum()); + GetEmitter()->emitIns_Mov(INS_vmov_i2f, srcSize, treeNode->GetRegNum(), op1->GetRegNum(), /* canSkip */ false); GetEmitter()->emitIns_R_R(insVcvt, srcSize, treeNode->GetRegNum(), treeNode->GetRegNum()); genProduceReg(treeNode); @@ -1556,7 +1540,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) assert(insVcvt != INS_invalid); GetEmitter()->emitIns_R_R(insVcvt, dstSize, tmpReg, op1->GetRegNum()); - GetEmitter()->emitIns_R_R(INS_vmov_f2i, dstSize, treeNode->GetRegNum(), tmpReg); + GetEmitter()->emitIns_Mov(INS_vmov_f2i, dstSize, treeNode->GetRegNum(), tmpReg, /* canSkip */ false); genProduceReg(treeNode); } @@ -1778,7 +1762,7 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper) { // Has a return value and r0 is in use. For emitting Leave profiler callout we would need r0 for passing // profiler handle. Therefore, r0 is moved to REG_PROFILER_RETURN_SCRATCH as per contract. - GetEmitter()->emitIns_R_R(INS_mov, attr, REG_PROFILER_RET_SCRATCH, REG_R0); + GetEmitter()->emitIns_Mov(INS_mov, attr, REG_PROFILER_RET_SCRATCH, REG_R0, /* canSkip */ false); genTransferRegGCState(REG_PROFILER_RET_SCRATCH, REG_R0); regSet.verifyRegUsed(REG_PROFILER_RET_SCRATCH); } @@ -1802,7 +1786,7 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper) // Restore state that existed before profiler callback if (r0InUse) { - GetEmitter()->emitIns_R_R(INS_mov, attr, REG_R0, REG_PROFILER_RET_SCRATCH); + GetEmitter()->emitIns_Mov(INS_mov, attr, REG_R0, REG_PROFILER_RET_SCRATCH, /* canSkip */ false); genTransferRegGCState(REG_R0, REG_PROFILER_RET_SCRATCH); gcInfo.gcMarkRegSetNpt(RBM_PROFILER_RET_SCRATCH); } @@ -1870,7 +1854,7 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni regSet.verifyRegUsed(REG_STACK_PROBE_HELPER_ARG); genEmitHelperCall(CORINFO_HELP_STACK_PROBE, 0, EA_UNKNOWN, REG_STACK_PROBE_HELPER_CALL_TARGET); compiler->unwindPadding(); - GetEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, REG_STACK_PROBE_HELPER_ARG); + GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_SPBASE, REG_STACK_PROBE_HELPER_ARG, /* canSkip */ false); if ((genRegMask(initReg) & (RBM_STACK_PROBE_HELPER_ARG | RBM_STACK_PROBE_HELPER_CALL_TARGET | RBM_STACK_PROBE_HELPER_TRASH)) != RBM_NONE) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index d20e538a397c3..14213cbcdc9a1 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -1530,7 +1530,7 @@ BasicBlock* CodeGen::genCallFinally(BasicBlock* block) } else { - GetEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_R0, REG_SPBASE); + GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_R0, REG_SPBASE, /* canSkip */ false); } GetEmitter()->emitIns_J(INS_bl_local, block->bbJumpDest); @@ -2044,11 +2044,9 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) } else // store into register (i.e move into register) { - if (dataReg != targetReg) - { - // Assign into targetReg when dataReg (from op1) is not the same register - inst_RV_RV(ins_Copy(targetType), targetReg, dataReg, targetType); - } + // Assign into targetReg when dataReg (from op1) is not the same register + inst_Mov(targetType, targetReg, dataReg, /* canSkip */ true); + genProduceReg(lclNode); } } @@ -2100,11 +2098,8 @@ void CodeGen::genSimpleReturn(GenTree* treeNode) } } } - if (movRequired) - { - emitAttr attr = emitActualTypeSize(targetType); - GetEmitter()->emitIns_R_R(INS_mov, attr, retReg, op1->GetRegNum()); - } + emitAttr attr = emitActualTypeSize(targetType); + GetEmitter()->emitIns_Mov(INS_mov, attr, retReg, op1->GetRegNum(), /* canSkip */ !movRequired); } /*********************************************************************************************** @@ -2170,10 +2165,7 @@ void CodeGen::genLclHeap(GenTree* tree) else { regCnt = tree->ExtractTempReg(); - if (regCnt != targetReg) - { - inst_RV_RV(INS_mov, regCnt, targetReg, size->TypeGet()); - } + inst_Mov(size->TypeGet(), regCnt, targetReg, /* canSkip */ true); } // Align to STACK_ALIGN @@ -2341,7 +2333,7 @@ void CodeGen::genLclHeap(GenTree* tree) inst_JMP(EJ_lo, done); // Update SP to be at the next page of stack that we will tickle - GetEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, regTmp); + GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_SPBASE, regTmp, /* canSkip */ false); // Jump to loop and tickle new stack address inst_JMP(EJ_jmp, loop); @@ -2350,7 +2342,7 @@ void CodeGen::genLclHeap(GenTree* tree) genDefineTempLabel(done); // Now just move the final value to SP - GetEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, regCnt); + GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_SPBASE, regCnt, /* canSkip */ false); // lastTouchDelta is dynamic, and can be up to a page. So if we have outgoing arg space, // we're going to assume the worst and probe. @@ -2384,7 +2376,7 @@ void CodeGen::genLclHeap(GenTree* tree) else // stackAdjustment == 0 { // Move the final value of SP to targetReg - inst_RV_RV(INS_mov, targetReg, REG_SPBASE); + inst_Mov(TYP_I_IMPL, targetReg, REG_SPBASE, /* canSkip */ false); } BAILOUT: @@ -2956,14 +2948,12 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) emitAttr dataSize = emitActualTypeSize(data); // casal use the comparand as the target reg - if (targetReg != comparandReg) - { - GetEmitter()->emitIns_R_R(INS_mov, dataSize, targetReg, comparandReg); + GetEmitter()->emitIns_Mov(INS_mov, dataSize, targetReg, comparandReg, /* canSkip */ true); + + // Catch case we destroyed data or address before use + noway_assert((addrReg != targetReg) || (targetReg == comparandReg)); + noway_assert((dataReg != targetReg) || (targetReg == comparandReg)); - // Catch case we destroyed data or address before use - noway_assert(addrReg != targetReg); - noway_assert(dataReg != targetReg); - } GetEmitter()->emitIns_R_R_R(INS_casal, dataSize, targetReg, dataReg, addrReg); } else @@ -3506,7 +3496,7 @@ void CodeGen::genCkfinite(GenTree* treeNode) regNumber intReg = treeNode->GetSingleTempReg(); regNumber fpReg = genConsumeReg(op1); - emit->emitIns_R_R(ins_Copy(targetType), emitActualTypeSize(treeNode), intReg, fpReg); + inst_Mov(targetType, intReg, fpReg, /* canSkip */ false, emitActualTypeSize(treeNode)); emit->emitIns_R_R_I(INS_lsr, emitActualTypeSize(targetType), intReg, intReg, shiftAmount); // Mask of exponent with all 1's and check if the exponent is all 1's @@ -3517,10 +3507,8 @@ void CodeGen::genCkfinite(GenTree* treeNode) genJumpToThrowHlpBlk(EJ_eq, SCK_ARITH_EXCPN); // if it is a finite value copy it to targetReg - if (treeNode->GetRegNum() != fpReg) - { - emit->emitIns_R_R(ins_Copy(targetType), emitActualTypeSize(treeNode), treeNode->GetRegNum(), fpReg); - } + inst_Mov(targetType, treeNode->GetRegNum(), fpReg, /* canSkip */ true); + genProduceReg(treeNode); } @@ -4078,7 +4066,7 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) if (opt == INS_OPTS_1D) { - GetEmitter()->emitIns_R_R(INS_mov, attr, targetReg, op1Reg); + GetEmitter()->emitIns_Mov(INS_mov, attr, targetReg, op1Reg, /* canSkip */ false); } else if (genIsValidIntReg(op1Reg)) { @@ -4162,10 +4150,7 @@ void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) } // Load the initialized value. - if (targetReg != vectorReg) - { - GetEmitter()->emitIns_R_R(INS_mov, EA_16BYTE, targetReg, vectorReg); - } + GetEmitter()->emitIns_Mov(INS_mov, EA_16BYTE, targetReg, vectorReg, /* canSkip */ true); genProduceReg(simdNode); } @@ -4201,10 +4186,15 @@ void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode) instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType); emitAttr attr = (simdNode->GetSimdSize() > 8) ? EA_16BYTE : EA_8BYTE; - insOpts opt = (ins == INS_mov) ? INS_OPTS_NONE : genGetSimdInsOpt(attr, baseType); - - GetEmitter()->emitIns_R_R(ins, attr, targetReg, op1Reg, opt); + if (GetEmitter()->IsMovInstruction(ins)) + { + GetEmitter()->emitIns_Mov(ins, attr, targetReg, op1Reg, /* canSkip */ false, INS_OPTS_NONE); + } + else + { + GetEmitter()->emitIns_R_R(ins, attr, targetReg, op1Reg, genGetSimdInsOpt(attr, baseType)); + } genProduceReg(simdNode); } @@ -4578,7 +4568,7 @@ void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode) emitAttr attr = emitTypeSize(baseType); // Insert mov if register assignment requires it - GetEmitter()->emitIns_R_R(INS_mov, EA_16BYTE, targetReg, op1Reg); + GetEmitter()->emitIns_Mov(INS_mov, EA_16BYTE, targetReg, op1Reg, /* canSkip */ false); if (genIsValidIntReg(op2Reg)) { @@ -5901,29 +5891,29 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R(INS_tst, EA_8BYTE, REG_R7, REG_R10); // mov reg, reg - theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R7, REG_R10); - theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R8, REG_SP); - theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_SP, REG_R9); + theEmitter->emitIns_Mov(INS_mov, EA_8BYTE, REG_R7, REG_R10, /* canSkip */ false); + theEmitter->emitIns_Mov(INS_mov, EA_8BYTE, REG_R8, REG_SP, /* canSkip */ false); + theEmitter->emitIns_Mov(INS_mov, EA_8BYTE, REG_SP, REG_R9, /* canSkip */ false); theEmitter->emitIns_R_R(INS_mvn, EA_8BYTE, REG_R5, REG_R11); theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_R4, REG_R12); theEmitter->emitIns_R_R(INS_negs, EA_8BYTE, REG_R3, REG_R13); - theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_R7, REG_R10); + theEmitter->emitIns_Mov(INS_mov, EA_4BYTE, REG_R7, REG_R10, /* canSkip */ false); theEmitter->emitIns_R_R(INS_mvn, EA_4BYTE, REG_R5, REG_R11); theEmitter->emitIns_R_R(INS_neg, EA_4BYTE, REG_R4, REG_R12); theEmitter->emitIns_R_R(INS_negs, EA_4BYTE, REG_R3, REG_R13); - theEmitter->emitIns_R_R(INS_sxtb, EA_8BYTE, REG_R7, REG_R10); - theEmitter->emitIns_R_R(INS_sxth, EA_8BYTE, REG_R5, REG_R11); - theEmitter->emitIns_R_R(INS_sxtw, EA_8BYTE, REG_R4, REG_R12); - theEmitter->emitIns_R_R(INS_uxtb, EA_8BYTE, REG_R3, REG_R13); // map to Wt - theEmitter->emitIns_R_R(INS_uxth, EA_8BYTE, REG_R2, REG_R14); // map to Wt + theEmitter->emitIns_Mov(INS_sxtb, EA_8BYTE, REG_R7, REG_R10, /* canSkip */ false); + theEmitter->emitIns_Mov(INS_sxth, EA_8BYTE, REG_R5, REG_R11, /* canSkip */ false); + theEmitter->emitIns_Mov(INS_sxtw, EA_8BYTE, REG_R4, REG_R12, /* canSkip */ false); + theEmitter->emitIns_Mov(INS_uxtb, EA_8BYTE, REG_R3, REG_R13, /* canSkip */ false); // map to Wt + theEmitter->emitIns_Mov(INS_uxth, EA_8BYTE, REG_R2, REG_R14, /* canSkip */ false); // map to Wt - theEmitter->emitIns_R_R(INS_sxtb, EA_4BYTE, REG_R7, REG_R10); - theEmitter->emitIns_R_R(INS_sxth, EA_4BYTE, REG_R5, REG_R11); - theEmitter->emitIns_R_R(INS_uxtb, EA_4BYTE, REG_R3, REG_R13); - theEmitter->emitIns_R_R(INS_uxth, EA_4BYTE, REG_R2, REG_R14); + theEmitter->emitIns_Mov(INS_sxtb, EA_4BYTE, REG_R7, REG_R10, /* canSkip */ false); + theEmitter->emitIns_Mov(INS_sxth, EA_4BYTE, REG_R5, REG_R11, /* canSkip */ false); + theEmitter->emitIns_Mov(INS_uxtb, EA_4BYTE, REG_R3, REG_R13, /* canSkip */ false); + theEmitter->emitIns_Mov(INS_uxth, EA_4BYTE, REG_R2, REG_R14, /* canSkip */ false); #endif // ALL_ARM64_EMITTER_UNIT_TESTS @@ -6980,24 +6970,24 @@ void CodeGen::genArm64EmitterUnitTests() // // mov vector to vector - theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_V0, REG_V1); - theEmitter->emitIns_R_R(INS_mov, EA_16BYTE, REG_V2, REG_V3); + theEmitter->emitIns_Mov(INS_mov, EA_8BYTE, REG_V0, REG_V1, /* canSkip */ false); + theEmitter->emitIns_Mov(INS_mov, EA_16BYTE, REG_V2, REG_V3, /* canSkip */ false); - theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_V12, REG_V13); - theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_V14, REG_V15); - theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_V16, REG_V17); + theEmitter->emitIns_Mov(INS_mov, EA_4BYTE, REG_V12, REG_V13, /* canSkip */ false); + theEmitter->emitIns_Mov(INS_mov, EA_2BYTE, REG_V14, REG_V15, /* canSkip */ false); + theEmitter->emitIns_Mov(INS_mov, EA_1BYTE, REG_V16, REG_V17, /* canSkip */ false); // mov vector to general - theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R0, REG_V4); - theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_R1, REG_V5); - theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_R2, REG_V6); - theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_R3, REG_V7); + theEmitter->emitIns_Mov(INS_mov, EA_8BYTE, REG_R0, REG_V4, /* canSkip */ false); + theEmitter->emitIns_Mov(INS_mov, EA_4BYTE, REG_R1, REG_V5, /* canSkip */ false); + theEmitter->emitIns_Mov(INS_mov, EA_2BYTE, REG_R2, REG_V6, /* canSkip */ false); + theEmitter->emitIns_Mov(INS_mov, EA_1BYTE, REG_R3, REG_V7, /* canSkip */ false); // mov general to vector - theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_V8, REG_R4); - theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_V9, REG_R5); - theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_V10, REG_R6); - theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_V11, REG_R7); + theEmitter->emitIns_Mov(INS_mov, EA_8BYTE, REG_V8, REG_R4, /* canSkip */ false); + theEmitter->emitIns_Mov(INS_mov, EA_4BYTE, REG_V9, REG_R5, /* canSkip */ false); + theEmitter->emitIns_Mov(INS_mov, EA_2BYTE, REG_V10, REG_R6, /* canSkip */ false); + theEmitter->emitIns_Mov(INS_mov, EA_1BYTE, REG_V11, REG_R7, /* canSkip */ false); // mov vector[index] to vector theEmitter->emitIns_R_R_I(INS_mov, EA_8BYTE, REG_V0, REG_V1, 1); @@ -7221,22 +7211,22 @@ void CodeGen::genArm64EmitterUnitTests() // // fmov to vector to vector - theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V0, REG_V2); - theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V1, REG_V3); + theEmitter->emitIns_Mov(INS_fmov, EA_8BYTE, REG_V0, REG_V2, /* canSkip */ false); + theEmitter->emitIns_Mov(INS_fmov, EA_4BYTE, REG_V1, REG_V3, /* canSkip */ false); // fmov to vector to general - theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_R0, REG_V4); - theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_R1, REG_V5); + theEmitter->emitIns_Mov(INS_fmov, EA_8BYTE, REG_R0, REG_V4, /* canSkip */ false); + theEmitter->emitIns_Mov(INS_fmov, EA_4BYTE, REG_R1, REG_V5, /* canSkip */ false); // using the optional conversion specifier - theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_D_TO_8BYTE); - theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_R3, REG_V7, INS_OPTS_S_TO_4BYTE); + theEmitter->emitIns_Mov(INS_fmov, EA_8BYTE, REG_R2, REG_V6, /* canSkip */ false, INS_OPTS_D_TO_8BYTE); + theEmitter->emitIns_Mov(INS_fmov, EA_4BYTE, REG_R3, REG_V7, /* canSkip */ false, INS_OPTS_S_TO_4BYTE); // fmov to general to vector - theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V8, REG_R4); - theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V9, REG_R5); + theEmitter->emitIns_Mov(INS_fmov, EA_8BYTE, REG_V8, REG_R4, /* canSkip */ false); + theEmitter->emitIns_Mov(INS_fmov, EA_4BYTE, REG_V9, REG_R5, /* canSkip */ false); // using the optional conversion specifier - theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V10, REG_R6, INS_OPTS_8BYTE_TO_D); - theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V11, REG_R7, INS_OPTS_4BYTE_TO_S); + theEmitter->emitIns_Mov(INS_fmov, EA_4BYTE, REG_V11, REG_R7, /* canSkip */ false, INS_OPTS_4BYTE_TO_S); + theEmitter->emitIns_Mov(INS_fmov, EA_8BYTE, REG_V10, REG_R6, /* canSkip */ false, INS_OPTS_8BYTE_TO_D); // fcmp/fcmpe theEmitter->emitIns_R_R(INS_fcmp, EA_8BYTE, REG_V8, REG_V16); diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 51ab100a33eac..a47503ef7aac8 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -1116,10 +1116,7 @@ void CodeGen::genPutArgReg(GenTreeOp* tree) genConsumeReg(op1); // If child node is not already in the register we need, move it - if (targetReg != op1->GetRegNum()) - { - inst_RV_RV(ins_Copy(targetType), targetReg, op1->GetRegNum(), targetType); - } + inst_Mov(targetType, targetReg, op1->GetRegNum(), /* canSkip */ true); genProduceReg(tree); } @@ -1167,13 +1164,12 @@ void CodeGen::genCodeForBitCast(GenTreeOp* treeNode) else #endif // TARGET_ARM { - instruction ins = ins_Copy(srcReg, targetType); - inst_RV_RV(ins, targetReg, srcReg, targetType); + inst_Mov(targetType, targetReg, srcReg, /* canSkip */ false); } } else { - inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType); + inst_Mov(targetType, targetReg, genConsumeReg(op1), /* canSkip */ false); } } @@ -1231,10 +1227,8 @@ void CodeGen::genPutArgSplit(GenTreePutArgSplit* treeNode) // Handle the first INT, and then handle the 2nd below. assert(nextArgNode->OperIs(GT_BITCAST)); type = TYP_INT; - if (argReg != fieldReg) - { - inst_RV_RV(ins_Copy(type), argReg, fieldReg, type); - } + inst_Mov(type, argReg, fieldReg, /* canSkip */ true); + // Now set up the next register for the 2nd INT argReg = REG_NEXT(argReg); regIndex++; @@ -1244,10 +1238,8 @@ void CodeGen::genPutArgSplit(GenTreePutArgSplit* treeNode) #endif // TARGET_ARM // If child node is not already in the register we need, move it - if (argReg != fieldReg) - { - inst_RV_RV(ins_Copy(type), argReg, fieldReg, type); - } + inst_Mov(type, argReg, fieldReg, /* canSkip */ true); + regIndex++; } } @@ -1381,7 +1373,7 @@ void CodeGen::genPutArgSplit(GenTreePutArgSplit* treeNode) if (targetReg == addrReg && idx != treeNode->gtNumRegs - 1) { assert(targetReg != baseReg); - emit->emitIns_R_R(INS_mov, emitActualTypeSize(type), baseReg, addrReg); + emit->emitIns_Mov(INS_mov, emitActualTypeSize(type), baseReg, addrReg, /* canSkip */ false); addrReg = baseReg; } @@ -1523,11 +1515,8 @@ void CodeGen::genCodeForPhysReg(GenTreePhysReg* tree) var_types targetType = tree->TypeGet(); regNumber targetReg = tree->GetRegNum(); - if (targetReg != tree->gtSrcReg) - { - inst_RV_RV(ins_Copy(targetType), targetReg, tree->gtSrcReg, targetType); - genTransferRegGCState(targetReg, tree->gtSrcReg); - } + inst_Mov(targetType, targetReg, tree->gtSrcReg, /* canSkip */ true); + genTransferRegGCState(targetReg, tree->gtSrcReg); genProduceReg(tree); } @@ -1688,10 +1677,7 @@ void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset) else { regNumber indexReg = genConsumeReg(indexNode); - if (indexReg != tgtReg) - { - inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_INT); - } + inst_Mov(TYP_INT, tgtReg, indexReg, /* canSkip */ true); } genProduceReg(arrOffset); } @@ -1789,7 +1775,7 @@ void CodeGen::genCodeForLclFld(GenTreeLclFld* tree) { regNumber floatAsInt = tree->GetSingleTempReg(); emit->emitIns_R_R(INS_ldr, EA_4BYTE, floatAsInt, addr); - emit->emitIns_R_R(INS_vmov_i2f, EA_4BYTE, targetReg, floatAsInt); + emit->emitIns_Mov(INS_vmov_i2f, EA_4BYTE, targetReg, floatAsInt, /* canSkip */ false); } else { @@ -2335,11 +2321,8 @@ void CodeGen::genCallInstruction(GenTreeCall* call) assert(putArgRegNode->gtOper == GT_PUTARG_REG); genConsumeReg(putArgRegNode); - - if (putArgRegNode->GetRegNum() != argReg) - { - inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), true), argReg, putArgRegNode->GetRegNum()); - } + inst_Mov_Extend(putArgRegNode->TypeGet(), /* srcInReg */ true, argReg, putArgRegNode->GetRegNum(), + /* canSkip */ true, emitActualTypeSize(TYP_I_IMPL)); argReg = genRegArgNext(argReg); @@ -2361,10 +2344,8 @@ void CodeGen::genCallInstruction(GenTreeCall* call) { regNumber argReg = (regNumber)((unsigned)curArgTabEntry->GetRegNum() + idx); regNumber allocReg = argNode->AsPutArgSplit()->GetRegNumByIdx(idx); - if (argReg != allocReg) - { - inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), true), argReg, allocReg); - } + inst_Mov_Extend(argNode->TypeGet(), /* srcInReg */ true, argReg, allocReg, /* canSkip */ true, + emitActualTypeSize(TYP_I_IMPL)); } } #endif // FEATURE_ARG_SPLIT @@ -2372,10 +2353,8 @@ void CodeGen::genCallInstruction(GenTreeCall* call) { regNumber argReg = curArgTabEntry->GetRegNum(); genConsumeReg(argNode); - if (argNode->GetRegNum() != argReg) - { - inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), true), argReg, argNode->GetRegNum()); - } + inst_Mov_Extend(argNode->TypeGet(), /* srcInReg */ true, argReg, argNode->GetRegNum(), /* canSkip */ true, + emitActualTypeSize(TYP_I_IMPL)); } } @@ -2431,10 +2410,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call) genConsumeReg(target); // Use IP0 on ARM64 and R12 on ARM32 as the call target register. - if (target->GetRegNum() != REG_FASTTAILCALL_TARGET) - { - inst_RV_RV(INS_mov, REG_FASTTAILCALL_TARGET, target->GetRegNum()); - } + inst_Mov(TYP_I_IMPL, REG_FASTTAILCALL_TARGET, target->GetRegNum(), /* canSkip */ true); } return; @@ -2622,10 +2598,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call) var_types regType = pRetTypeDesc->GetReturnRegType(i); returnReg = pRetTypeDesc->GetABIReturnReg(i); regNumber allocatedReg = call->GetRegNumByIdx(i); - if (returnReg != allocatedReg) - { - inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType); - } + inst_Mov(regType, allocatedReg, returnReg, /* canSkip */ true); } } else @@ -2661,12 +2634,12 @@ void CodeGen::genCallInstruction(GenTreeCall* call) } else if (compiler->opts.compUseSoftFP && returnType == TYP_FLOAT) { - inst_RV_RV(INS_vmov_i2f, call->GetRegNum(), returnReg, returnType); + inst_Mov(returnType, call->GetRegNum(), returnReg, /* canSkip */ false); } else #endif { - inst_RV_RV(ins_Copy(returnType), call->GetRegNum(), returnReg, returnType); + inst_Mov(returnType, call->GetRegNum(), returnReg, /* canSkip */ false); } } } @@ -3177,7 +3150,7 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast) break; } - GetEmitter()->emitIns_R_R(ins, EA_ATTR(insSize), dstReg, srcReg); + GetEmitter()->emitIns_Mov(ins, EA_ATTR(insSize), dstReg, srcReg, /* canSkip */ false); } genProduceReg(cast); @@ -3228,9 +3201,10 @@ void CodeGen::genFloatToFloatCast(GenTree* treeNode) GetEmitter()->emitIns_R_R(insVcvt, emitTypeSize(treeNode), treeNode->GetRegNum(), op1->GetRegNum()); } - else if (treeNode->GetRegNum() != op1->GetRegNum()) + else { - GetEmitter()->emitIns_R_R(INS_vmov, emitTypeSize(treeNode), treeNode->GetRegNum(), op1->GetRegNum()); + GetEmitter()->emitIns_Mov(INS_vmov, emitTypeSize(treeNode), treeNode->GetRegNum(), op1->GetRegNum(), + /* canSkip */ true); } #elif defined(TARGET_ARM64) @@ -3243,10 +3217,11 @@ void CodeGen::genFloatToFloatCast(GenTree* treeNode) GetEmitter()->emitIns_R_R(INS_fcvt, emitActualTypeSize(treeNode), treeNode->GetRegNum(), op1->GetRegNum(), cvtOption); } - else if (treeNode->GetRegNum() != op1->GetRegNum()) + else { // If double to double cast or float to float cast. Emit a move instruction. - GetEmitter()->emitIns_R_R(INS_mov, emitActualTypeSize(treeNode), treeNode->GetRegNum(), op1->GetRegNum()); + GetEmitter()->emitIns_Mov(INS_mov, emitActualTypeSize(treeNode), treeNode->GetRegNum(), op1->GetRegNum(), + /* canSkip */ true); } #endif // TARGET* @@ -3591,10 +3566,7 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea) } else // offset is zero { - if (lea->GetRegNum() != memBase->GetRegNum()) - { - emit->emitIns_R_R(INS_mov, size, lea->GetRegNum(), memBase->GetRegNum()); - } + emit->emitIns_Mov(INS_mov, size, lea->GetRegNum(), memBase->GetRegNum(), /* canSkip */ true); } } else diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 6e5c25dad2f7d..f6eaa8b044ccc 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -4238,7 +4238,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere regNumber begRegNum = genMapRegArgNumToRegNum(begReg, destMemType); - GetEmitter()->emitIns_R_R(insCopy, size, xtraReg, begRegNum); + GetEmitter()->emitIns_Mov(insCopy, size, xtraReg, begRegNum, /* canSkip */ false); regSet.verifyRegUsed(xtraReg); @@ -4255,7 +4255,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType); regNumber srcRegNum = genMapRegArgNumToRegNum(srcReg, destMemType); - GetEmitter()->emitIns_R_R(insCopy, size, destRegNum, srcRegNum); + GetEmitter()->emitIns_Mov(insCopy, size, destRegNum, srcRegNum, /* canSkip */ false); regSet.verifyRegUsed(destRegNum); @@ -4307,7 +4307,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType); - GetEmitter()->emitIns_R_R(insCopy, size, destRegNum, xtraReg); + GetEmitter()->emitIns_Mov(insCopy, size, destRegNum, xtraReg, /* canSkip */ false); regSet.verifyRegUsed(destRegNum); #ifdef USING_SCOPE_INFO @@ -4494,8 +4494,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere size = EA_8BYTE; } #endif - instruction copyIns = ins_Copy(regNum, destMemType); - GetEmitter()->emitIns_R_R(copyIns, size, destRegNum, regNum); + inst_Mov(destMemType, destRegNum, regNum, /* canSkip */ false, size); #ifdef USING_SCOPE_INFO psiMoveToReg(varNum); #endif // USING_SCOPE_INFO @@ -4551,7 +4550,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere noway_assert(regArgTab[nextArgNum].varNum == varNum); noway_assert(genIsValidFloatReg(nextRegNum)); noway_assert(genIsValidFloatReg(destRegNum)); - GetEmitter()->emitIns_R_R(INS_mov, EA_8BYTE, destRegNum, nextRegNum); + GetEmitter()->emitIns_Mov(INS_mov, EA_8BYTE, destRegNum, nextRegNum, /* canSkip */ false); } } #if defined(TARGET_ARM64) && defined(FEATURE_SIMD) @@ -5126,7 +5125,7 @@ void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& if (fltInitReg != REG_NA) { // Copy from float. - inst_RV_RV(ins_Copy(TYP_FLOAT), reg, fltInitReg, TYP_FLOAT); + inst_Mov(TYP_FLOAT, reg, fltInitReg, /* canSkip */ false); } else { @@ -5140,7 +5139,7 @@ void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& else { // Copy from int. - inst_RV_RV(INS_vmov_i2f, reg, initReg, TYP_FLOAT, EA_4BYTE); + inst_Mov(TYP_FLOAT, reg, initReg, /* canSkip */ false); } #elif defined(TARGET_XARCH) // XORPS is the fastest and smallest way to initialize a XMM register to zero. @@ -5161,7 +5160,7 @@ void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& if (dblInitReg != REG_NA) { // Copy from double. - inst_RV_RV(ins_Copy(TYP_DOUBLE), reg, dblInitReg, TYP_DOUBLE); + inst_Mov(TYP_DOUBLE, reg, dblInitReg, /* canSkip */ false); } else { @@ -5295,7 +5294,7 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) { // Restore sp from fp // mov sp, fp - inst_RV_RV(INS_mov, REG_SPBASE, REG_FPBASE); + inst_Mov(TYP_I_IMPL, REG_SPBASE, REG_FPBASE, /* canSkip */ false); compiler->unwindSetFrameReg(REG_FPBASE, 0); } @@ -6908,7 +6907,7 @@ void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData) if (delta == 0) { - GetEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE); + GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, /* canSkip */ false); #ifdef USING_SCOPE_INFO psiMoveESPtoEBP(); #endif // USING_SCOPE_INFO @@ -6939,7 +6938,7 @@ void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData) if (delta == 0) { - GetEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE); + GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, /* canSkip */ false); } else { @@ -7443,7 +7442,7 @@ void CodeGen::genFnProlog() #ifdef TARGET_ARM if (compiler->compLocallocUsed) { - GetEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, REG_SAVED_LOCALLOC_SP, REG_SPBASE); + GetEmitter()->emitIns_Mov(INS_mov, EA_4BYTE, REG_SAVED_LOCALLOC_SP, REG_SPBASE, /* canSkip */ false); regSet.verifyRegUsed(REG_SAVED_LOCALLOC_SP); compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, 0); } @@ -7734,11 +7733,8 @@ void CodeGen::genFnProlog() if (varDsc->lvIsInReg()) { - if (varDsc->GetRegNum() != REG_EAX) - { - GetEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, varDsc->GetRegNum(), REG_EAX); - regSet.verifyRegUsed(varDsc->GetRegNum()); - } + GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, varDsc->GetRegNum(), REG_EAX, /* canSkip */ true); + regSet.verifyRegUsed(varDsc->GetRegNum()); } else { @@ -7849,7 +7845,7 @@ void CodeGen::genFnEpilog(BasicBlock* block) } // mov R9 into SP - inst_RV_RV(INS_mov, REG_SP, REG_SAVED_LOCALLOC_SP); + inst_Mov(TYP_I_IMPL, REG_SP, REG_SAVED_LOCALLOC_SP, /* canSkip */ false); compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, 0); } @@ -7886,7 +7882,7 @@ void CodeGen::genFnEpilog(BasicBlock* block) regNumber vptrReg1 = REG_LR; instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr); - GetEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, vptrReg1, indCallReg); + GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, vptrReg1, indCallReg, /* canSkip */ false); GetEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0); GetEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, indCallReg, vptrReg1); } @@ -8331,7 +8327,7 @@ void CodeGen::genFnEpilog(BasicBlock* block) if (needMovEspEbp) { // mov esp, ebp - inst_RV_RV(INS_mov, REG_SPBASE, REG_FPBASE); + inst_Mov(TYP_I_IMPL, REG_SPBASE, REG_FPBASE, /* canSkip */ false); } #endif // !TARGET_AMD64 @@ -10963,14 +10959,9 @@ void CodeGen::genLongReturn(GenTree* treeNode) genConsumeReg(loRetVal); genConsumeReg(hiRetVal); - if (loRetVal->GetRegNum() != REG_LNGRET_LO) - { - inst_RV_RV(ins_Copy(targetType), REG_LNGRET_LO, loRetVal->GetRegNum(), TYP_INT); - } - if (hiRetVal->GetRegNum() != REG_LNGRET_HI) - { - inst_RV_RV(ins_Copy(targetType), REG_LNGRET_HI, hiRetVal->GetRegNum(), TYP_INT); - } + + inst_Mov(targetType, REG_LNGRET_LO, loRetVal->GetRegNum(), /* canSkip */ true, emitActualTypeSize(TYP_INT)); + inst_Mov(targetType, REG_LNGRET_HI, hiRetVal->GetRegNum(), /* canSkip */ true, emitActualTypeSize(TYP_INT)); } #endif // TARGET_X86 || TARGET_ARM @@ -11042,7 +11033,8 @@ void CodeGen::genReturn(GenTree* treeNode) { if (targetType == TYP_FLOAT) { - GetEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, REG_INTRET, op1->GetRegNum()); + GetEmitter()->emitIns_Mov(INS_vmov_f2i, EA_4BYTE, REG_INTRET, op1->GetRegNum(), + /* canSkip */ false); } else { @@ -11055,10 +11047,7 @@ void CodeGen::genReturn(GenTree* treeNode) #endif // TARGET_ARM { regNumber retReg = varTypeUsesFloatReg(treeNode) ? REG_FLOATRET : REG_INTRET; - if (op1->GetRegNum() != retReg) - { - inst_RV_RV(ins_Move_Extend(targetType, true), retReg, op1->GetRegNum(), targetType); - } + inst_Mov_Extend(targetType, /* srcInReg */ true, retReg, op1->GetRegNum(), /* canSkip */ true); } #endif // !TARGET_ARM64 } @@ -11286,11 +11275,11 @@ void CodeGen::genStructReturn(GenTree* treeNode) assert(compiler->lvaGetDesc(fieldVarNum)->lvOnFrame); GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, fieldVarNum, 0); } - else if (fromReg != toReg) + else { // Note that ins_Copy(fromReg, type) will return the appropriate register to copy // between register files if needed. - inst_RV_RV(ins_Copy(fromReg, type), toReg, fromReg, type); + inst_Mov(type, toReg, fromReg, /* canSkip */ true); } } } @@ -11402,11 +11391,10 @@ void CodeGen::genMultiRegStoreToLocal(GenTreeLclVar* lclNode) if (varReg != REG_NA) { hasRegs = true; - if (varReg != reg) - { - // We may need a cross register-file copy here. - inst_RV_RV(ins_Copy(reg, destType), varReg, reg, destType); - } + + // We may need a cross register-file copy here. + inst_Mov(destType, varReg, reg, /* canSkip */ true); + fieldVarDsc->SetRegNum(varReg); } else @@ -11543,7 +11531,7 @@ void CodeGen::genRegCopy(GenTree* treeNode) assert(targetReg != REG_NA); assert(targetType != TYP_STRUCT); - inst_RV_RV(ins_Copy(srcReg, targetType), targetReg, srcReg, targetType); + inst_Mov(targetType, targetReg, srcReg, /* canSkip */ false); if (op1->IsLocal()) { @@ -11634,7 +11622,7 @@ regNumber CodeGen::genRegCopy(GenTree* treeNode, unsigned multiRegIndex) unsigned fieldVarNum = parentVarDsc->lvFieldLclStart + multiRegIndex; LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(fieldVarNum); type = fieldVarDsc->TypeGet(); - inst_RV_RV(ins_Copy(type), targetReg, sourceReg, type); + inst_Mov(type, targetReg, sourceReg, /* canSkip */ false); if (!op1->AsLclVar()->IsLastUse(multiRegIndex) && fieldVarDsc->GetRegNum() != REG_STK) { // The old location is dying @@ -11654,7 +11642,7 @@ regNumber CodeGen::genRegCopy(GenTree* treeNode, unsigned multiRegIndex) else { type = op1->GetRegTypeByIndex(multiRegIndex); - inst_RV_RV(ins_Copy(type), targetReg, sourceReg, type); + inst_Mov(type, targetReg, sourceReg, /* canSkip */ false); // We never spill after a copy, so to produce the single register, we simply need to // update the GC info for the defined register. gcInfo.gcMarkRegPtrVal(targetReg, type); diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index f1120aafd54e2..d4bbf00562262 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1287,10 +1287,7 @@ void CodeGen::genCopyRegIfNeeded(GenTree* node, regNumber needReg) { assert((node->GetRegNum() != REG_NA) && (needReg != REG_NA)); assert(!node->isUsedFromSpillTemp()); - if (node->GetRegNum() != needReg) - { - inst_RV_RV(INS_mov, needReg, node->GetRegNum(), node->TypeGet()); - } + inst_Mov(node->TypeGet(), needReg, node->GetRegNum(), /* canSkip */ true); } // Do Liveness update for a subnodes that is being consumed by codegen @@ -1459,9 +1456,9 @@ regNumber CodeGen::genConsumeReg(GenTree* tree) { GenTreeLclVarCommon* lcl = tree->AsLclVarCommon(); LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()]; - if (varDsc->GetRegNum() != REG_STK && varDsc->GetRegNum() != tree->GetRegNum()) + if (varDsc->GetRegNum() != REG_STK) { - inst_RV_RV(ins_Copy(tree->TypeGet()), tree->GetRegNum(), varDsc->GetRegNum()); + inst_Mov(tree->TypeGet(), tree->GetRegNum(), varDsc->GetRegNum(), /* canSkip */ true); } } @@ -1759,7 +1756,7 @@ void CodeGen::genConsumePutStructArgStk(GenTreePutArgStk* putArgNode, #ifdef TARGET_X86 assert(dstReg != REG_SPBASE); - inst_RV_RV(INS_mov, dstReg, REG_SPBASE); + inst_Mov(TYP_I_IMPL, dstReg, REG_SPBASE, /* canSkip */ false); #else // !TARGET_X86 GenTree* dstAddr = putArgNode; if (dstAddr->GetRegNum() != dstReg) @@ -1772,25 +1769,22 @@ void CodeGen::genConsumePutStructArgStk(GenTreePutArgStk* putArgNode, } #endif // !TARGET_X86 - if (srcAddr->GetRegNum() != srcReg) + if (srcAddr->OperIsLocalAddr()) { - if (srcAddr->OperIsLocalAddr()) - { - // The OperLocalAddr is always contained. - assert(srcAddr->isContained()); - const GenTreeLclVarCommon* lclNode = srcAddr->AsLclVarCommon(); + // The OperLocalAddr is always contained. + assert(srcAddr->isContained()); + const GenTreeLclVarCommon* lclNode = srcAddr->AsLclVarCommon(); - // Generate LEA instruction to load the LclVar address in RSI. - // Source is known to be on the stack. Use EA_PTRSIZE. - unsigned int offset = lclNode->GetLclOffs(); - GetEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, srcReg, lclNode->GetLclNum(), offset); - } - else - { - assert(srcAddr->GetRegNum() != REG_NA); - // Source is not known to be on the stack. Use EA_BYREF. - GetEmitter()->emitIns_R_R(INS_mov, EA_BYREF, srcReg, srcAddr->GetRegNum()); - } + // Generate LEA instruction to load the LclVar address in RSI. + // Source is known to be on the stack. Use EA_PTRSIZE. + unsigned int offset = lclNode->GetLclOffs(); + GetEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, srcReg, lclNode->GetLclNum(), offset); + } + else + { + assert(srcAddr->GetRegNum() != REG_NA); + // Source is not known to be on the stack. Use EA_BYREF. + GetEmitter()->emitIns_Mov(INS_mov, EA_BYREF, srcReg, srcAddr->GetRegNum(), /* canSkip */ true); } if (sizeReg != REG_NA) @@ -1910,10 +1904,7 @@ void CodeGen::genSetBlockSize(GenTreeBlk* blkNode, regNumber sizeReg) else { GenTree* sizeNode = blkNode->AsDynBlk()->gtDynamicSize; - if (sizeNode->GetRegNum() != sizeReg) - { - inst_RV_RV(INS_mov, sizeReg, sizeNode->GetRegNum(), sizeNode->TypeGet()); - } + inst_Mov(sizeNode->TypeGet(), sizeReg, sizeNode->GetRegNum(), /* canSkip */ true); } } } diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 37adbf75707f3..6a468c3b6965a 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -276,7 +276,7 @@ BasicBlock* CodeGen::genCallFinally(BasicBlock* block) (!compiler->compLocallocUsed && (compiler->funCurrentFunc()->funKind == FUNC_ROOT))) { #ifndef UNIX_X86_ABI - inst_RV_RV(INS_mov, REG_ARG_0, REG_SPBASE, TYP_I_IMPL); + inst_Mov(TYP_I_IMPL, REG_ARG_0, REG_SPBASE, /* canSkip */ false); #endif // !UNIX_X86_ABI } else @@ -559,10 +559,7 @@ void CodeGen::genCodeForNegNot(GenTree* tree) assert(operand->isUsedFromReg()); regNumber operandReg = genConsumeReg(operand); - if (operandReg != targetReg) - { - inst_RV_RV(INS_mov, targetReg, operandReg, targetType); - } + inst_Mov(targetType, targetReg, operandReg, /* canSkip */ true); instruction ins = genGetInsForOper(tree->OperGet(), targetType); inst_RV(ins, targetReg, targetType); @@ -592,10 +589,7 @@ void CodeGen::genCodeForBswap(GenTree* tree) assert(operand->isUsedFromReg()); regNumber operandReg = genConsumeReg(operand); - if (operandReg != targetReg) - { - inst_RV_RV(INS_mov, targetReg, operandReg, targetType); - } + inst_Mov(targetType, targetReg, operandReg, /* canSkip */ true); if (tree->OperIs(GT_BSWAP)) { @@ -641,10 +635,7 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) assert(regOp->isUsedFromReg()); // Setup targetReg when neither of the source operands was a matching register - if (regOp->GetRegNum() != REG_RAX) - { - inst_RV_RV(ins_Copy(targetType), REG_RAX, regOp->GetRegNum(), targetType); - } + inst_Mov(targetType, REG_RAX, regOp->GetRegNum(), /* canSkip */ true); instruction ins; if ((treeNode->gtFlags & GTF_UNSIGNED) == 0) @@ -658,9 +649,9 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) emit->emitInsBinary(ins, size, treeNode, rmOp); // Move the result to the desired register, if necessary - if (treeNode->OperGet() == GT_MULHI && targetReg != REG_RDX) + if (treeNode->OperGet() == GT_MULHI) { - inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType); + inst_Mov(targetType, targetReg, REG_RDX, /* canSkip */ true); } genProduceReg(treeNode); @@ -731,11 +722,11 @@ void CodeGen::genCodeForLongUMod(GenTreeOp* node) // div divisor->GetRegNum() // mov eax, temp const regNumber tempReg = node->GetSingleTempReg(); - inst_RV_RV(INS_mov, tempReg, REG_EAX, TYP_INT); - inst_RV_RV(INS_mov, REG_EAX, REG_EDX, TYP_INT); + inst_Mov(TYP_INT, tempReg, REG_EAX, /* canSkip */ false); + inst_Mov(TYP_INT, REG_EAX, REG_EDX, /* canSkip */ false); instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX); inst_RV(INS_div, divisor->GetRegNum(), TYP_INT); - inst_RV_RV(INS_mov, REG_EAX, tempReg, TYP_INT); + inst_Mov(TYP_INT, REG_EAX, tempReg, /* canSkip */ false); // noOverflow: // div divisor->GetRegNum() @@ -743,10 +734,7 @@ void CodeGen::genCodeForLongUMod(GenTreeOp* node) inst_RV(INS_div, divisor->GetRegNum(), TYP_INT); const regNumber targetReg = node->GetRegNum(); - if (targetReg != REG_EDX) - { - inst_RV_RV(INS_mov, targetReg, REG_RDX, TYP_INT); - } + inst_Mov(TYP_INT, targetReg, REG_RDX, /* canSkip */ true); genProduceReg(node); } #endif // TARGET_X86 @@ -818,18 +806,12 @@ void CodeGen::genCodeForDivMod(GenTreeOp* treeNode) // Move the result to the desired register, if necessary if (oper == GT_DIV || oper == GT_UDIV) { - if (targetReg != REG_RAX) - { - inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType); - } + inst_Mov(targetType, targetReg, REG_RAX, /* canSkip */ true); } else { assert((oper == GT_MOD) || (oper == GT_UMOD)); - if (targetReg != REG_RDX) - { - inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType); - } + inst_Mov(targetType, targetReg, REG_RDX, /* canSkip */ true); } genProduceReg(treeNode); } @@ -952,7 +934,7 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode) else { var_types op1Type = op1->TypeGet(); - inst_RV_RV(ins_Copy(op1Type), targetReg, op1reg, op1Type); + inst_Mov(op1Type, targetReg, op1reg, /* canSkip */ false); regSet.verifyRegUsed(targetReg); gcInfo.gcMarkRegPtrVal(targetReg, op1Type); dst = treeNode; @@ -1062,11 +1044,9 @@ void CodeGen::genCodeForMul(GenTreeOp* treeNode) uint64_t zextImm = static_cast(static_cast(imm)); unsigned int shiftAmount = genLog2(zextImm); - if (targetReg != rmOp->GetRegNum()) - { - // Copy reg src to dest register - inst_RV_RV(INS_mov, targetReg, rmOp->GetRegNum(), targetType); - } + // Copy reg src to dest register + inst_Mov(targetType, targetReg, rmOp->GetRegNum(), /* canSkip */ true); + inst_RV_SH(INS_shl, size, targetReg, shiftAmount); } else @@ -1103,17 +1083,14 @@ void CodeGen::genCodeForMul(GenTreeOp* treeNode) assert(regOp->isUsedFromReg()); // Setup targetReg when neither of the source operands was a matching register - if (regOp->GetRegNum() != mulTargetReg) - { - inst_RV_RV(INS_mov, mulTargetReg, regOp->GetRegNum(), targetType); - } + inst_Mov(targetType, mulTargetReg, regOp->GetRegNum(), /* canSkip */ true); emit->emitInsBinary(ins, size, treeNode, rmOp); // Move the result to the desired register, if necessary - if ((ins == INS_mulEAX) && (targetReg != REG_RAX)) + if (ins == INS_mulEAX) { - inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType); + inst_Mov(targetType, targetReg, REG_RAX, /* canSkip */ true); } } @@ -1163,7 +1140,7 @@ void CodeGen::genSIMDSplitReturn(GenTree* src, ReturnTypeDesc* retTypeDesc) // Operand reg is different from return regs. // Copy opReg to reg0 and let it to be handled by one of the // two cases below. - inst_RV_RV(ins_Copy(opReg, TYP_SIMD16), reg0, opReg, TYP_SIMD16); + inst_Mov(TYP_SIMD16, reg0, opReg, /* canSkip */ false); opReg = reg0; } @@ -1171,15 +1148,13 @@ void CodeGen::genSIMDSplitReturn(GenTree* src, ReturnTypeDesc* retTypeDesc) { assert(opReg != reg1); // reg1 = opReg. - inst_RV_RV(ins_Copy(opReg, TYP_SIMD16), reg1, opReg, TYP_SIMD16); + inst_Mov(TYP_SIMD16, reg1, opReg, /* canSkip */ false); } else { assert(opReg == reg1); - // reg0 = opReg. - - inst_RV_RV(ins_Copy(opReg, TYP_SIMD16), reg0, opReg, TYP_SIMD16); + inst_Mov(TYP_SIMD16, reg0, opReg, /* canSkip */ false); } // reg0 - already has required 8-byte in bit position [63:0]. // swap upper and lower 8-bytes of reg1 so that desired 8-byte is in bit position [63:0]. @@ -1190,7 +1165,7 @@ void CodeGen::genSIMDSplitReturn(GenTree* src, ReturnTypeDesc* retTypeDesc) assert(srcIsFloatReg != dstIsFloatReg); assert((reg0 == REG_EAX) && (reg1 == REG_EDX)); // reg0 = opReg[31:0] - inst_RV_RV(ins_Copy(opReg, TYP_INT), reg0, opReg, TYP_INT); + inst_Mov(TYP_INT, reg0, opReg, /* canSkip */ false); // reg1 = opRef[61:32] if (compiler->compOpportunisticallyDependsOn(InstructionSet_SSE41)) { @@ -1200,7 +1175,7 @@ void CodeGen::genSIMDSplitReturn(GenTree* src, ReturnTypeDesc* retTypeDesc) { int8_t shuffleMask = 1; // we only need [61:32]->[31:0], the rest is not read. inst_RV_TT_IV(INS_pshufd, EA_8BYTE, opReg, src, shuffleMask); - inst_RV_RV(ins_Copy(opReg, TYP_INT), reg1, opReg, TYP_INT); + inst_Mov(TYP_INT, reg1, opReg, /* canSkip */ false); } #endif // TARGET_X86 } @@ -1387,7 +1362,7 @@ void CodeGen::inst_SETCC(GenCondition condition, var_types type, regNumber dstRe if (!varTypeIsByte(type)) { - GetEmitter()->emitIns_R_R(INS_movzx, EA_1BYTE, dstReg, dstReg); + GetEmitter()->emitIns_Mov(INS_movzx, EA_1BYTE, dstReg, dstReg, /* canSkip */ false); } } @@ -1954,7 +1929,7 @@ void CodeGen::genMultiRegStoreToSIMDLocal(GenTreeLclVar* lclNode) { // targetReg = reg0; // targetReg[127:64] = reg1[127:64] - inst_RV_RV(ins_Copy(TYP_DOUBLE), targetReg, reg0, TYP_DOUBLE); + inst_Mov(TYP_DOUBLE, targetReg, reg0, /* canSkip */ false); inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, reg1, 0x00); } else if (targetReg == reg0) @@ -2080,7 +2055,7 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni } else { - GetEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, REG_STACK_PROBE_HELPER_ARG); + GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_SPBASE, REG_STACK_PROBE_HELPER_ARG, /* canSkip */ false); } #else // !TARGET_X86 static_assert_no_msg((RBM_STACK_PROBE_HELPER_ARG & (RBM_SECRET_STUB_PARAM | RBM_DEFAULT_HELPER_CALL_TARGET)) == @@ -2098,7 +2073,7 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni static_assert_no_msg((RBM_STACK_PROBE_HELPER_TRASH & RBM_STACK_PROBE_HELPER_ARG) == RBM_NONE); - GetEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, REG_STACK_PROBE_HELPER_ARG); + GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_SPBASE, REG_STACK_PROBE_HELPER_ARG, /* canSkip */ false); #endif // !TARGET_X86 compiler->unwindAllocStack(frameSize); @@ -2145,9 +2120,9 @@ void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTm // TODO-CQ: manipulate ESP directly, to share code, reduce #ifdefs, and improve CQ. This would require // creating a way to temporarily turn off the emitter's tracking of ESP, maybe marking instrDescs as "don't // track". - inst_RV_RV(INS_mov, regTmp, REG_SPBASE, TYP_I_IMPL); + inst_Mov(TYP_I_IMPL, regTmp, REG_SPBASE, /* canSkip */ false); inst_RV_IV(INS_sub, regTmp, (target_ssize_t)-spDelta, EA_PTRSIZE); - inst_RV_RV(INS_mov, REG_SPBASE, regTmp, TYP_I_IMPL); + inst_Mov(TYP_I_IMPL, REG_SPBASE, regTmp, /* canSkip */ false); } else #endif // TARGET_X86 @@ -2285,15 +2260,15 @@ void CodeGen::genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta, re // Subtract a page from ESP. This is a trick to avoid the emitter trying to track the // decrement of the ESP - we do the subtraction in another reg instead of adjusting ESP directly. - inst_RV_RV(INS_mov, regTmp, REG_SPBASE, TYP_I_IMPL); + inst_Mov(TYP_I_IMPL, regTmp, REG_SPBASE, /* canSkip */ false); inst_RV_IV(INS_sub, regTmp, compiler->eeGetPageSize(), EA_PTRSIZE); - inst_RV_RV(INS_mov, REG_SPBASE, regTmp, TYP_I_IMPL); + inst_Mov(TYP_I_IMPL, REG_SPBASE, regTmp, /* canSkip */ false); inst_RV_RV(INS_cmp, REG_SPBASE, regSpDelta, TYP_I_IMPL); inst_JMP(EJ_jae, loop); // Move the final value to ESP - inst_RV_RV(INS_mov, REG_SPBASE, regSpDelta); + inst_Mov(TYP_I_IMPL, REG_SPBASE, regSpDelta, /* canSkip */ false); } //------------------------------------------------------------------------ @@ -2377,11 +2352,9 @@ void CodeGen::genLclHeap(GenTree* tree) else { regCnt = tree->ExtractTempReg(); - if (regCnt != targetReg) - { - // Above, we put the size in targetReg. Now, copy it to our new temp register if necessary. - inst_RV_RV(INS_mov, regCnt, targetReg, size->TypeGet()); - } + + // Above, we put the size in targetReg. Now, copy it to our new temp register if necessary. + inst_Mov(size->TypeGet(), regCnt, targetReg, /* canSkip */ true); } // Round up the number of bytes to allocate to a STACK_ALIGN boundary. This is done @@ -2774,7 +2747,7 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node) } else { - emit->emitIns_R_R(INS_movd, EA_PTRSIZE, srcXmmReg, srcIntReg); + emit->emitIns_Mov(INS_movd, EA_PTRSIZE, srcXmmReg, srcIntReg, /* canSkip */ false); emit->emitIns_R_R(INS_punpckldq, EA_16BYTE, srcXmmReg, srcXmmReg); #ifdef TARGET_X86 // For x86, we need one more to convert it from 8 bytes to 16 bytes. @@ -3653,14 +3626,11 @@ void CodeGen::genLockedInstructions(GenTreeOp* node) genConsumeOperands(node); - if (node->GetRegNum() != data->GetRegNum()) - { - // If the destination register is different from the data register then we need - // to first move the data to the target register. Make sure we don't overwrite - // the address, the register allocator should have taken care of this. - assert(node->GetRegNum() != addr->GetRegNum()); - GetEmitter()->emitIns_R_R(INS_mov, size, node->GetRegNum(), data->GetRegNum()); - } + // If the destination register is different from the data register then we need + // to first move the data to the target register. Make sure we don't overwrite + // the address, the register allocator should have taken care of this. + assert((node->GetRegNum() != addr->GetRegNum()) || (node->GetRegNum() == data->GetRegNum())); + GetEmitter()->emitIns_Mov(INS_mov, size, node->GetRegNum(), data->GetRegNum(), /* canSkip */ true); instruction ins = node->OperIs(GT_XADD) ? INS_xadd : INS_xchg; @@ -3701,10 +3671,7 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* tree) // comparand goes to RAX; // Note that we must issue this move after the genConsumeRegs(), in case any of the above // have a GT_COPY from RAX. - if (comparand->GetRegNum() != REG_RAX) - { - inst_RV_RV(ins_Copy(comparand->TypeGet()), REG_RAX, comparand->GetRegNum(), comparand->TypeGet()); - } + inst_Mov(comparand->TypeGet(), REG_RAX, comparand->GetRegNum(), /* canSkip */ true); // location is Rm instGen(INS_lock); @@ -3712,10 +3679,7 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* tree) GetEmitter()->emitIns_AR_R(INS_cmpxchg, emitTypeSize(targetType), value->GetRegNum(), location->GetRegNum(), 0); // Result is in RAX - if (targetReg != REG_RAX) - { - inst_RV_RV(ins_Copy(targetType), targetReg, REG_RAX, targetType); - } + inst_Mov(targetType, targetReg, REG_RAX, /* canSkip */ true); genProduceReg(tree); } @@ -3810,11 +3774,8 @@ void CodeGen::genCodeForPhysReg(GenTreePhysReg* tree) var_types targetType = tree->TypeGet(); regNumber targetReg = tree->GetRegNum(); - if (targetReg != tree->gtSrcReg) - { - inst_RV_RV(ins_Copy(targetType), targetReg, tree->gtSrcReg, targetType); - genTransferRegGCState(targetReg, tree->gtSrcReg); - } + inst_Mov(targetType, targetReg, tree->gtSrcReg, /* canSkip */ true); + genTransferRegGCState(targetReg, tree->gtSrcReg); genProduceReg(tree); } @@ -3901,10 +3862,7 @@ void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex) // Subtract the lower bound for this dimension. // TODO-XArch-CQ: make this contained if it's an immediate that fits. - if (tgtReg != indexReg) - { - inst_RV_RV(INS_mov, tgtReg, indexReg, indexNode->TypeGet()); - } + inst_Mov(indexNode->TypeGet(), tgtReg, indexReg, /* canSkip */ true); GetEmitter()->emitIns_R_AR(INS_sub, emitActualTypeSize(TYP_INT), tgtReg, arrReg, genOffsetOfMDArrayLowerBound(elemType, rank, dim)); GetEmitter()->emitIns_R_AR(INS_cmp, emitActualTypeSize(TYP_INT), tgtReg, arrReg, @@ -3989,19 +3947,13 @@ void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset) } else { - if (indexReg != tgtReg) - { - inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_I_IMPL); - } + inst_Mov(TYP_I_IMPL, tgtReg, indexReg, /* canSkip */ true); inst_RV_RV(INS_add, tgtReg, tmpReg); } } else { - if (indexReg != tgtReg) - { - inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_INT); - } + inst_Mov(TYP_INT, tgtReg, indexReg, /* canSkip */ true); } genProduceReg(arrOffset); } @@ -4151,10 +4103,7 @@ void CodeGen::genCodeForShift(GenTree* tree) // First, move the operand to the destination register and // later on perform the shift in-place. // (LSRA will try to avoid this situation through preferencing.) - if (tree->GetRegNum() != operandReg) - { - inst_RV_RV(INS_mov, tree->GetRegNum(), operandReg, targetType); - } + inst_Mov(targetType, tree->GetRegNum(), operandReg, /* canSkip */ true); inst_RV_SH(ins, size, tree->GetRegNum(), shiftByValue); } } @@ -4168,10 +4117,7 @@ void CodeGen::genCodeForShift(GenTree* tree) // The operand to be shifted must not be in ECX noway_assert(operandReg != REG_RCX); - if (tree->GetRegNum() != operandReg) - { - inst_RV_RV(INS_mov, tree->GetRegNum(), operandReg, targetType); - } + inst_Mov(targetType, tree->GetRegNum(), operandReg, /* canSkip */ true); inst_RV_CL(ins, tree->GetRegNum(), targetType); } @@ -4225,10 +4171,7 @@ void CodeGen::genCodeForShiftLong(GenTree* tree) regNumber regResult = (oper == GT_LSH_HI) ? regHi : regLo; - if (regResult != tree->GetRegNum()) - { - inst_RV_RV(INS_mov, tree->GetRegNum(), regResult, targetType); - } + inst_Mov(targetType, tree->GetRegNum(), regResult, /* canSkip */ true); if (oper == GT_LSH_HI) { @@ -4535,10 +4478,12 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) assert((op1->GetRegNum() == REG_NA) && op1->OperIsConst()); genSetRegToConst(targetReg, targetType, op1); } - else if (op1->GetRegNum() != targetReg) + else { + assert(targetReg == lclNode->GetRegNum()); assert(op1->GetRegNum() != REG_NA); - emit->emitInsBinary(ins_Move_Extend(targetType, true), emitTypeSize(targetType), lclNode, op1); + inst_Mov_Extend(targetType, /* srcInReg */ true, targetReg, op1->GetRegNum(), /* canSkip */ true, + emitTypeSize(targetType)); } } if (targetReg != REG_NA) @@ -4600,7 +4545,7 @@ void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node) if (index->TypeGet() != TYP_I_IMPL) { // LEA needs 64-bit operands so we need to widen the index if it's TYP_INT. - GetEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, tmpReg, indexReg); + GetEmitter()->emitIns_Mov(INS_mov, EA_4BYTE, tmpReg, indexReg, /* canSkip */ false); indexReg = tmpReg; } #endif // TARGET_64BIT @@ -5043,10 +4988,8 @@ void CodeGen::genCallInstruction(GenTreeCall* call) // Validate the putArgRegNode has the right type. assert(varTypeUsesFloatReg(putArgRegNode->TypeGet()) == genIsValidFloatReg(argReg)); - if (putArgRegNode->GetRegNum() != argReg) - { - inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), false), argReg, putArgRegNode->GetRegNum()); - } + inst_Mov_Extend(putArgRegNode->TypeGet(), /* srcInReg */ false, argReg, putArgRegNode->GetRegNum(), + /* canSkip */ true, emitActualTypeSize(TYP_I_IMPL)); } } else @@ -5054,10 +4997,8 @@ void CodeGen::genCallInstruction(GenTreeCall* call) { regNumber argReg = curArgTabEntry->GetRegNum(); genConsumeReg(argNode); - if (argNode->GetRegNum() != argReg) - { - inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), false), argReg, argNode->GetRegNum()); - } + inst_Mov_Extend(argNode->TypeGet(), /* srcInReg */ false, argReg, argNode->GetRegNum(), /* canSkip */ true, + emitActualTypeSize(TYP_I_IMPL)); } #if FEATURE_VARARG @@ -5069,7 +5010,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call) { regNumber srcReg = argNode->GetRegNum(); regNumber targetReg = compiler->getCallArgIntRegister(argNode->GetRegNum()); - inst_RV_RV(ins_Copy(srcReg, TYP_LONG), targetReg, srcReg); + inst_Mov(TYP_LONG, targetReg, srcReg, /* canSkip */ false, emitActualTypeSize(TYP_I_IMPL)); } #endif // FEATURE_VARARG } @@ -5445,10 +5386,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call) var_types regType = retTypeDesc->GetReturnRegType(i); returnReg = retTypeDesc->GetABIReturnReg(i); regNumber allocatedReg = call->GetRegNumByIdx(i); - if (returnReg != allocatedReg) - { - inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType); - } + inst_Mov(regType, allocatedReg, returnReg, /* canSkip */ true); } #ifdef FEATURE_SIMD @@ -5487,10 +5425,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call) returnReg = REG_INTRET; } - if (call->GetRegNum() != returnReg) - { - inst_RV_RV(ins_Copy(returnType), call->GetRegNum(), returnReg, returnType); - } + inst_Mov(returnType, call->GetRegNum(), returnReg, /* canSkip */ true); } genProduceReg(call); @@ -5515,7 +5450,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call) // ECX is trashed, so can be used to compute the expected SP. We saved the value of SP // after pushing all the stack arguments, but the caller popped the arguments, so we need // to do some math to figure a good comparison. - GetEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, REG_ARG_0, REG_SPBASE); + GetEmitter()->emitIns_Mov(INS_mov, EA_4BYTE, REG_ARG_0, REG_SPBASE, /* canSkip */ false); GetEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_ARG_0, stackArgBytes); GetEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_ARG_0, compiler->lvaCallSpCheck, 0); } @@ -5812,7 +5747,7 @@ void CodeGen::genJmpMethod(GenTree* jmp) if (varTypeIsFloating(loadType)) { intArgReg = compiler->getCallArgIntRegister(argReg); - inst_RV_RV(ins_Copy(argReg, TYP_LONG), intArgReg, argReg, loadType); + inst_Mov(TYP_LONG, intArgReg, argReg, /* canSkip */ false, emitActualTypeSize(loadType)); } else { @@ -5864,7 +5799,7 @@ void CodeGen::genJmpMethod(GenTree* jmp) // also load it in corresponding float arg reg regNumber floatReg = compiler->getCallArgFloatRegister(argReg); - inst_RV_RV(ins_Copy(argReg, TYP_DOUBLE), floatReg, argReg); + inst_Mov(TYP_DOUBLE, floatReg, argReg, /* canSkip */ false, emitActualTypeSize(TYP_I_IMPL)); } argOffset += REGSIZE_BYTES; @@ -6016,10 +5951,7 @@ void CodeGen::genCompareInt(GenTree* treeNode) // Morph/Lowering are responsible to rotate "00" so we won't handle it here. if ((targetReg != REG_NA) && tree->OperIs(GT_LT, GT_GE) && !tree->IsUnsigned()) { - if (targetReg != op1->GetRegNum()) - { - inst_RV_RV(INS_mov, targetReg, op1->GetRegNum(), op1->TypeGet()); - } + inst_Mov(op1->TypeGet(), targetReg, op1->GetRegNum(), /* canSkip */ true); if (tree->OperIs(GT_GE)) { // emit "not" for "x>=0" case @@ -6178,10 +6110,7 @@ void CodeGen::genLongToIntCast(GenTree* cast) } } - if (dstReg != loSrcReg) - { - inst_RV_RV(INS_mov, dstReg, loSrcReg, TYP_INT, EA_4BYTE); - } + inst_Mov(TYP_INT, dstReg, loSrcReg, /* canSkip */ true); genProduceReg(cast); } @@ -6212,7 +6141,7 @@ void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& d // upper 32 bits are zero. This requires a temporary register. const regNumber tempReg = cast->GetSingleTempReg(); assert(tempReg != reg); - GetEmitter()->emitIns_R_R(INS_mov, EA_8BYTE, tempReg, reg); + GetEmitter()->emitIns_Mov(INS_mov, EA_8BYTE, tempReg, reg, /* canSkip */ false); GetEmitter()->emitIns_R_I(INS_shr_N, EA_8BYTE, tempReg, 32); genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); } @@ -6281,56 +6210,41 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast) genIntCastOverflowCheck(cast, desc, srcReg); } - if ((desc.ExtendKind() != GenIntCastDesc::COPY) || (srcReg != dstReg)) - { - instruction ins; - unsigned insSize; - bool canSkip = false; + instruction ins; + unsigned insSize; + bool canSkip = false; - switch (desc.ExtendKind()) - { - case GenIntCastDesc::ZERO_EXTEND_SMALL_INT: - ins = INS_movzx; - insSize = desc.ExtendSrcSize(); - break; - case GenIntCastDesc::SIGN_EXTEND_SMALL_INT: - ins = INS_movsx; - insSize = desc.ExtendSrcSize(); - break; + switch (desc.ExtendKind()) + { + case GenIntCastDesc::ZERO_EXTEND_SMALL_INT: + ins = INS_movzx; + insSize = desc.ExtendSrcSize(); + break; + case GenIntCastDesc::SIGN_EXTEND_SMALL_INT: + ins = INS_movsx; + insSize = desc.ExtendSrcSize(); + break; #ifdef TARGET_64BIT - case GenIntCastDesc::ZERO_EXTEND_INT: - // We can skip emitting this zero extending move if the previous instruction zero extended implicitly - if ((srcReg == dstReg) && compiler->opts.OptimizationEnabled()) - { - canSkip = emit->AreUpper32BitsZero(srcReg); - } - - ins = INS_mov; - insSize = 4; - break; - case GenIntCastDesc::SIGN_EXTEND_INT: - ins = INS_movsxd; - insSize = 4; - break; + case GenIntCastDesc::ZERO_EXTEND_INT: + ins = INS_mov; + insSize = 4; + canSkip = compiler->opts.OptimizationEnabled() && emit->AreUpper32BitsZero(srcReg); + break; + case GenIntCastDesc::SIGN_EXTEND_INT: + ins = INS_movsxd; + insSize = 4; + break; #endif - default: - assert(desc.ExtendKind() == GenIntCastDesc::COPY); - assert(srcReg != dstReg); - ins = INS_mov; - insSize = desc.ExtendSrcSize(); - break; - } - - if (canSkip) - { - JITDUMP("\n -- suppressing emission as previous instruction already properly extends.\n"); - } - else - { - emit->emitIns_R_R(ins, EA_ATTR(insSize), dstReg, srcReg); - } + default: + assert(desc.ExtendKind() == GenIntCastDesc::COPY); + ins = INS_mov; + insSize = desc.ExtendSrcSize(); + canSkip = true; + break; } + emit->emitIns_Mov(ins, EA_ATTR(insSize), dstReg, srcReg, canSkip); + genProduceReg(cast); } @@ -6619,7 +6533,7 @@ void CodeGen::genCkfinite(GenTree* treeNode) // bits of the integer register. regNumber srcReg = op1->GetRegNum(); var_types targetIntType = ((targetType == TYP_FLOAT) ? TYP_INT : TYP_LONG); - inst_RV_RV(ins_Copy(srcReg, targetIntType), tmpReg, srcReg, targetType); + inst_Mov(targetIntType, tmpReg, srcReg, /* canSkip */ false, emitActualTypeSize(targetType)); if (targetType == TYP_DOUBLE) { // right shift by 32 bits to get to exponent. @@ -6634,10 +6548,7 @@ void CodeGen::genCkfinite(GenTree* treeNode) genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN); // if it is a finite value copy it to targetReg - if (targetReg != op1->GetRegNum()) - { - inst_RV_RV(ins_Copy(targetType), targetReg, op1->GetRegNum(), targetType); - } + inst_Mov(targetType, targetReg, op1->GetRegNum(), /* canSkip */ true); #else // !TARGET_64BIT @@ -6674,10 +6585,7 @@ void CodeGen::genCkfinite(GenTree* treeNode) if (targetType == TYP_DOUBLE) { - if (targetReg != op1->GetRegNum()) - { - inst_RV_RV(ins_Copy(targetType), targetReg, op1->GetRegNum(), targetType); - } + inst_Mov(targetType, targetReg, op1->GetRegNum(), /* canSkip */ true); inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, targetReg, (int8_t)0xb1); copyToTmpSrcReg = targetReg; } @@ -6688,7 +6596,7 @@ void CodeGen::genCkfinite(GenTree* treeNode) // Copy only the low 32 bits. This will be the high order 32 bits of the floating-point // value, no matter the floating-point type. - inst_RV_RV(ins_Copy(copyToTmpSrcReg, TYP_INT), tmpReg, copyToTmpSrcReg, TYP_FLOAT); + inst_Mov(TYP_INT, tmpReg, copyToTmpSrcReg, /* canSkip */ false, emitActualTypeSize(TYP_FLOAT)); // Mask exponent with all 1's and check if the exponent is all 1's inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE); @@ -6697,17 +6605,17 @@ void CodeGen::genCkfinite(GenTree* treeNode) // If exponent is all 1's, throw ArithmeticException genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN); - if (targetReg != op1->GetRegNum()) + if ((targetType == TYP_DOUBLE) && (targetReg == op1->GetRegNum())) + { + // We need to re-shuffle the targetReg to get the correct result. + inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, targetReg, (int8_t)0xb1); + } + else { // In both the TYP_FLOAT and TYP_DOUBLE case, the op1 register is untouched, // so copy it to the targetReg. This is faster and smaller for TYP_DOUBLE // than re-shuffling the targetReg. - inst_RV_RV(ins_Copy(targetType), targetReg, op1->GetRegNum(), targetType); - } - else if (targetType == TYP_DOUBLE) - { - // We need to re-shuffle the targetReg to get the correct result. - inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, targetReg, (int8_t)0xb1); + inst_Mov(targetType, targetReg, op1->GetRegNum(), /* canSkip */ true); } #endif // !TARGET_64BIT @@ -7105,16 +7013,11 @@ void CodeGen::genBitCast(var_types targetType, regNumber targetReg, var_types sr { const bool srcFltReg = varTypeUsesFloatReg(srcType) || varTypeIsSIMD(srcType); assert(srcFltReg == genIsValidFloatReg(srcReg)); + const bool dstFltReg = varTypeUsesFloatReg(targetType) || varTypeIsSIMD(targetType); assert(dstFltReg == genIsValidFloatReg(targetReg)); - if (srcFltReg != dstFltReg) - { - inst_RV_RV(ins_Copy(srcReg, targetType), targetReg, srcReg, targetType); - } - else if (targetReg != srcReg) - { - inst_RV_RV(ins_Copy(targetType), targetReg, srcReg, targetType); - } + + inst_Mov(targetType, targetReg, srcReg, /* canSkip */ true); } //---------------------------------------------------------------------- @@ -7515,7 +7418,7 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk) noway_assert((genRegMask(intTmpReg) & RBM_BYTE_REGS) != 0); if (argReg != REG_NA) { - inst_RV_RV(INS_mov, intTmpReg, argReg, fieldType); + inst_Mov(fieldType, intTmpReg, argReg, /* canSkip */ false); argReg = intTmpReg; } } @@ -7736,10 +7639,7 @@ void CodeGen::genPutArgReg(GenTreeOp* tree) genConsumeReg(op1); // If child node is not already in the register we need, move it - if (targetReg != op1->GetRegNum()) - { - inst_RV_RV(ins_Copy(targetType), targetReg, op1->GetRegNum(), targetType); - } + inst_Mov(targetType, targetReg, op1->GetRegNum(), /* canSkip */ true); genProduceReg(tree); } @@ -8773,7 +8673,7 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType)) { regNumber intArgReg = compiler->getCallArgIntRegister(argReg); - inst_RV_RV(ins_Copy(argReg, TYP_LONG), intArgReg, argReg, loadType); + inst_Mov(TYP_LONG, intArgReg, argReg, /* canSkip */ false, emitActualTypeSize(loadType)); } #endif // FEATURE_VARARG } diff --git a/src/coreclr/jit/emitarm.cpp b/src/coreclr/jit/emitarm.cpp index 685c17f929381..6953df5b49a7c 100644 --- a/src/coreclr/jit/emitarm.cpp +++ b/src/coreclr/jit/emitarm.cpp @@ -1219,6 +1219,35 @@ emitter::insSize emitter::emitInsSize(insFormat insFmt) return ISZ_48BIT; } +//------------------------------------------------------------------------ +// IsMovInstruction: Determines whether a give instruction is a move instruction +// +// Arguments: +// ins -- The instruction being checked +// +bool emitter::IsMovInstruction(instruction ins) +{ + switch (ins) + { + case INS_mov: + case INS_sxtb: + case INS_sxth: + case INS_uxtb: + case INS_uxth: + case INS_vmov: + case INS_vmov_i2f: + case INS_vmov_f2i: + { + return true; + } + + default: + { + return false; + } + } +} + /***************************************************************************** * * isModImmConst() returns true when immediate 'val32' can be encoded @@ -2003,6 +2032,174 @@ void emitter::emitIns_MovRelocatableImmediate(instruction ins, emitAttr attr, re appendToCurIG(id); } +//------------------------------------------------------------------------ +// emitIns_Mov: Emits a move instruction +// +// Arguments: +// ins -- The instruction being emitted +// attr -- The emit attribute +// dstReg -- The destination register +// srcReg -- The source register +// canSkip -- true if the move can be elided when dstReg == srcReg, otherwise false +// flags -- The instructiion flags +// +void emitter::emitIns_Mov(instruction ins, + emitAttr attr, + regNumber dstReg, + regNumber srcReg, + bool canSkip, + insFlags flags /* = INS_FLAGS_DONT_CARE */) +{ + assert(IsMovInstruction(ins)); + + emitAttr size = EA_SIZE(attr); + insFormat fmt = IF_NONE; + insFlags sf = INS_FLAGS_DONT_CARE; + + /* Figure out the encoding format of the instruction */ + switch (ins) + { + case INS_mov: + { + if (insDoesNotSetFlags(flags)) + { + if (canSkip && (dstReg == srcReg)) + { + // These instructions have no side effect and can be skipped + return; + } + fmt = IF_T1_D0; + sf = INS_FLAGS_NOT_SET; + } + else // insSetsFlags(flags) + { + sf = INS_FLAGS_SET; + if (isLowRegister(dstReg) && isLowRegister(srcReg)) + { + fmt = IF_T1_E; + } + else + { + fmt = IF_T2_C3; + } + } + break; + } + + case INS_vmov: + { + // VM debugging single stepper doesn't support PC register with this instruction. + assert(dstReg != REG_PC); + assert(srcReg != REG_PC); + + if (canSkip && (dstReg == srcReg)) + { + // These instructions have no side effect and can be skipped + return; + } + + if (size == EA_8BYTE) + { + assert(isDoubleReg(dstReg)); + assert(isDoubleReg(srcReg)); + } + else + { + assert(isFloatReg(dstReg)); + assert(isFloatReg(srcReg)); + } + + fmt = IF_T2_VFP2; + sf = INS_FLAGS_NOT_SET; + break; + } + + case INS_vmov_i2f: + { + // VM debugging single stepper doesn't support PC register with this instruction. + assert(srcReg != REG_PC); + assert(isFloatReg(dstReg)); + assert(isGeneralRegister(srcReg)); + + fmt = IF_T2_VMOVS; + sf = INS_FLAGS_NOT_SET; + break; + } + + case INS_vmov_f2i: + { + // VM debugging single stepper doesn't support PC register with this instruction. + assert(dstReg != REG_PC); + assert(isGeneralRegister(dstReg)); + assert(isFloatReg(srcReg)); + + fmt = IF_T2_VMOVS; + sf = INS_FLAGS_NOT_SET; + break; + } + + case INS_sxtb: + case INS_uxtb: + { + assert(size == EA_4BYTE); + goto EXTEND_COMMON; + } + + case INS_sxth: + case INS_uxth: + { + assert(size == EA_4BYTE); + + EXTEND_COMMON: + if (canSkip && (dstReg == srcReg)) + { + // There are scenarios such as in genCallInstruction where the sign/zero extension should be elided + return; + } + + // VM debugging single stepper doesn't support PC register with this instruction. + assert(dstReg != REG_PC); + assert(srcReg != REG_PC); + assert(insDoesNotSetFlags(flags)); + + if (isLowRegister(dstReg) && isLowRegister(srcReg)) + { + fmt = IF_T1_E; + sf = INS_FLAGS_NOT_SET; + } + else + { + // Use the Thumb-2 reg,reg with rotation encoding + emitIns_R_R_I(ins, attr, dstReg, srcReg, 0, INS_FLAGS_NOT_SET); + return; + } + break; + } + + default: + { + unreached(); + } + } + + assert((fmt == IF_T1_D0) || (fmt == IF_T1_E) || (fmt == IF_T2_C3) || (fmt == IF_T2_VFP2) || (fmt == IF_T2_VMOVS)); + + assert(sf != INS_FLAGS_DONT_CARE); + + instrDesc* id = emitNewInstrSmall(attr); + insSize isz = emitInsSize(fmt); + + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsSize(isz); + id->idInsFlags(sf); + id->idReg1(dstReg); + id->idReg2(srcReg); + + dispIns(id); + appendToCurIG(id); +} + /***************************************************************************** * * Add an instruction referencing two registers @@ -2012,6 +2209,12 @@ void emitter::emitIns_R_R( instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insFlags flags /* = INS_FLAGS_DONT_CARE */) { + if (IsMovInstruction(ins)) + { + assert(!"Please use emitIns_Mov() to correctly handle move elision"); + emitIns_Mov(ins, attr, reg1, reg2, /* canSkip */ false, flags); + } + emitAttr size = EA_SIZE(attr); insFormat fmt = IF_NONE; insFlags sf = INS_FLAGS_DONT_CARE; @@ -2038,27 +2241,6 @@ void emitter::emitIns_R_R( emitIns_R_R_R(ins, attr, reg1, reg1, reg2, flags); return; - case INS_mov: - if (insDoesNotSetFlags(flags)) - { - assert(reg1 != reg2); - fmt = IF_T1_D0; - sf = INS_FLAGS_NOT_SET; - } - else // insSetsFlags(flags) - { - sf = INS_FLAGS_SET; - if (isLowRegister(reg1) && isLowRegister(reg2)) - { - fmt = IF_T1_E; - } - else - { - fmt = IF_T2_C3; - } - } - break; - case INS_cmp: assert(reg1 != REG_PC); // VM debugging single stepper doesn't support PC register with this instruction. assert(reg2 != REG_PC); @@ -2074,22 +2256,6 @@ void emitter::emitIns_R_R( } break; - case INS_vmov_f2i: - assert(reg1 != REG_PC); // VM debugging single stepper doesn't support PC register with this instruction. - assert(isGeneralRegister(reg1)); - assert(isFloatReg(reg2)); - fmt = IF_T2_VMOVS; - sf = INS_FLAGS_NOT_SET; - break; - - case INS_vmov_i2f: - assert(reg2 != REG_PC); // VM debugging single stepper doesn't support PC register with this instruction. - assert(isFloatReg(reg1)); - assert(isGeneralRegister(reg2)); - fmt = IF_T2_VMOVS; - sf = INS_FLAGS_NOT_SET; - break; - case INS_vcvt_d2i: case INS_vcvt_d2u: case INS_vcvt_d2f: @@ -2113,12 +2279,6 @@ void emitter::emitIns_R_R( assert(isFloatReg(reg2)); goto VCVT_COMMON; - case INS_vmov: - assert(reg1 != REG_PC); // VM debugging single stepper doesn't support PC register with this instruction. - assert(reg2 != REG_PC); - assert(reg1 != reg2); - FALLTHROUGH; - case INS_vabs: case INS_vsqrt: case INS_vcmp: @@ -2248,31 +2408,6 @@ void emitter::emitIns_R_R( } break; - case INS_sxtb: - case INS_uxtb: - assert(size == EA_4BYTE); - goto EXTEND_COMMON; - - case INS_sxth: - case INS_uxth: - assert(size == EA_4BYTE); - EXTEND_COMMON: - assert(reg1 != REG_PC); // VM debugging single stepper doesn't support PC register with this instruction. - assert(reg2 != REG_PC); - assert(insDoesNotSetFlags(flags)); - if (isLowRegister(reg1) && isLowRegister(reg2)) - { - fmt = IF_T1_E; - sf = INS_FLAGS_NOT_SET; - } - else - { - // Use the Thumb-2 reg,reg with rotation encoding - emitIns_R_R_I(ins, attr, reg1, reg2, 0, INS_FLAGS_NOT_SET); - return; - } - break; - case INS_tbb: assert(size == EA_4BYTE); assert(insDoesNotSetFlags(flags)); @@ -2448,10 +2583,7 @@ void emitter::emitIns_R_R_I(instruction ins, { // Is the mov even necessary? // Fix 383915 ARM ILGEN - if (reg1 != reg2) - { - emitIns_R_R(INS_mov, attr, reg1, reg2, flags); - } + emitIns_Mov(INS_mov, attr, reg1, reg2, /* canSkip */ true, flags); return; } // Can we encode the immediate 'imm' using a Thumb-1 encoding? @@ -2689,11 +2821,7 @@ void emitter::emitIns_R_R_I(instruction ins, if (imm == 0) { // Additional Fix 383915 ARM ILGEN - if ((reg1 != reg2) || insMustSetFlags(flags)) - { - // Use MOV/MOVS instriction - emitIns_R_R(INS_mov, attr, reg1, reg2, flags); - } + emitIns_Mov(INS_mov, attr, reg1, reg2, /* canSkip */ !insMustSetFlags(flags), flags); return; } @@ -7698,7 +7826,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR if (type == TYP_FLOAT) { regNumber tmpReg = indir->GetSingleTempReg(); - emitIns_R_R(INS_vmov_f2i, EA_4BYTE, tmpReg, dataReg); + emitIns_Mov(INS_vmov_f2i, EA_4BYTE, tmpReg, dataReg, /* canSkip */ false); emitInsLoadStoreOp(INS_str, EA_4BYTE, tmpReg, indir, 0); return; } @@ -7719,7 +7847,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR { regNumber tmpReg = indir->GetSingleTempReg(); emitInsLoadStoreOp(INS_ldr, EA_4BYTE, tmpReg, indir, 0); - emitIns_R_R(INS_vmov_i2f, EA_4BYTE, dataReg, tmpReg); + emitIns_Mov(INS_vmov_i2f, EA_4BYTE, dataReg, tmpReg, /* canSkip */ false); return; } else if (type == TYP_DOUBLE) diff --git a/src/coreclr/jit/emitarm.h b/src/coreclr/jit/emitarm.h index 27e5e9b1f6b7a..ac6e4f139db7f 100644 --- a/src/coreclr/jit/emitarm.h +++ b/src/coreclr/jit/emitarm.h @@ -92,6 +92,7 @@ emitter::code_t emitInsCode(instruction ins, insFormat fmt); void emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTreeIndir* indir); void emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTreeIndir* indir, int offset); +static bool IsMovInstruction(instruction ins); static bool isModImmConst(int imm); static int encodeModImmConst(int imm); @@ -223,6 +224,13 @@ void emitIns_R_I( instruction ins, emitAttr attr, regNumber reg, target_ssize_t imm, insFlags flags = INS_FLAGS_DONT_CARE); void emitIns_MovRelocatableImmediate(instruction ins, emitAttr attr, regNumber reg, BYTE* addr); +void emitIns_Mov(instruction ins, + emitAttr attr, + regNumber dstReg, + regNumber srgReg, + bool canSkip, + insFlags flags = INS_FLAGS_DONT_CARE); + void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insFlags flags = INS_FLAGS_DONT_CARE); void emitIns_R_I_I( diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 3badba90a0c03..37e19a219d265 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -4059,14 +4059,22 @@ void emitter::emitIns_R_F( appendToCurIG(id); } -/***************************************************************************** - * - * Add an instruction referencing two registers - */ - -void emitter::emitIns_R_R( - instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insOpts opt /* = INS_OPTS_NONE */) +//------------------------------------------------------------------------ +// emitIns_Mov: Emits a move instruction +// +// Arguments: +// ins -- The instruction being emitted +// attr -- The emit attribute +// dstReg -- The destination register +// srcReg -- The source register +// canSkip -- true if the move can be elided when dstReg == srcReg, otherwise false +// insOpts -- The instruction options +// +void emitter::emitIns_Mov( + instruction ins, emitAttr attr, regNumber dstReg, regNumber srcReg, bool canSkip, insOpts opt /* = INS_OPTS_NONE */) { + assert(IsMovInstruction(ins)); + emitAttr size = EA_SIZE(attr); emitAttr elemsize = EA_UNKNOWN; insFormat fmt = IF_NONE; @@ -4075,53 +4083,170 @@ void emitter::emitIns_R_R( switch (ins) { case INS_mov: + { assert(insOptsNone(opt)); - // Is the mov even necessary? - if (emitComp->opts.OptimizationEnabled() && IsRedundantMov(ins, size, reg1, reg2)) + if (IsRedundantMov(ins, size, dstReg, srcReg, canSkip)) { + // These instructions have no side effect and can be skipped return; } // Check for the 'mov' aliases for the vector registers - if (isVectorRegister(reg1)) + if (isVectorRegister(dstReg)) { - if (isVectorRegister(reg2) && isValidVectorDatasize(size)) + if (isVectorRegister(srcReg) && isValidVectorDatasize(size)) { - return emitIns_R_R_R(INS_mov, size, reg1, reg2, reg2); + return emitIns_R_R_R(INS_mov, size, dstReg, srcReg, srcReg); } else { - return emitIns_R_R_I(INS_mov, size, reg1, reg2, 0); + return emitIns_R_R_I(INS_mov, size, dstReg, srcReg, 0); } } else { - if (isVectorRegister(reg2)) + if (isVectorRegister(srcReg)) { - assert(isGeneralRegister(reg1)); - return emitIns_R_R_I(INS_mov, size, reg1, reg2, 0); + assert(isGeneralRegister(dstReg)); + return emitIns_R_R_I(INS_mov, size, dstReg, srcReg, 0); } } // Is this a MOV to/from SP instruction? - if ((reg1 == REG_SP) || (reg2 == REG_SP)) + if ((dstReg == REG_SP) || (srcReg == REG_SP)) { - assert(isGeneralRegisterOrSP(reg1)); - assert(isGeneralRegisterOrSP(reg2)); - reg1 = encodingSPtoZR(reg1); - reg2 = encodingSPtoZR(reg2); - fmt = IF_DR_2G; + assert(isGeneralRegisterOrSP(dstReg)); + assert(isGeneralRegisterOrSP(srcReg)); + dstReg = encodingSPtoZR(dstReg); + srcReg = encodingSPtoZR(srcReg); + fmt = IF_DR_2G; } else { assert(insOptsNone(opt)); - assert(isGeneralRegister(reg1)); - assert(isGeneralRegisterOrZR(reg2)); + assert(isGeneralRegister(dstReg)); + assert(isGeneralRegisterOrZR(srcReg)); fmt = IF_DR_2E; } break; + } + + case INS_sxtw: + { + assert(size == EA_8BYTE); + FALLTHROUGH; + } + + case INS_sxtb: + case INS_sxth: + case INS_uxtb: + case INS_uxth: + { + if (canSkip && (dstReg == srcReg)) + { + // There are scenarios such as in genCallInstruction where the sign/zero extension should be elided + return; + } + + assert(insOptsNone(opt)); + assert(isValidGeneralDatasize(size)); + assert(isGeneralRegister(dstReg)); + assert(isGeneralRegister(srcReg)); + fmt = IF_DR_2H; + break; + } + + case INS_fmov: + { + assert(isValidVectorElemsizeFloat(size)); + + if (canSkip && (dstReg == srcReg)) + { + // These instructions have no side effect and can be skipped + return; + } + + if (isVectorRegister(dstReg)) + { + if (isVectorRegister(srcReg)) + { + assert(insOptsNone(opt)); + fmt = IF_DV_2G; + } + else + { + assert(isGeneralRegister(srcReg)); + + // if the optional conversion specifier is not present we calculate it + if (opt == INS_OPTS_NONE) + { + opt = (size == EA_4BYTE) ? INS_OPTS_4BYTE_TO_S : INS_OPTS_8BYTE_TO_D; + } + assert(insOptsConvertIntToFloat(opt)); + + fmt = IF_DV_2I; + } + } + else + { + assert(isGeneralRegister(dstReg)); + assert(isVectorRegister(srcReg)); + + // if the optional conversion specifier is not present we calculate it + if (opt == INS_OPTS_NONE) + { + opt = (size == EA_4BYTE) ? INS_OPTS_S_TO_4BYTE : INS_OPTS_D_TO_8BYTE; + } + assert(insOptsConvertFloatToInt(opt)); + + fmt = IF_DV_2H; + } + break; + } + + default: + { + unreached(); + } + } + + assert(fmt != IF_NONE); + + instrDesc* id = emitNewInstrSmall(attr); + + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(opt); + + id->idReg1(dstReg); + id->idReg2(srcReg); + + dispIns(id); + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add an instruction referencing two registers + */ +void emitter::emitIns_R_R( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insOpts opt /* = INS_OPTS_NONE */) +{ + if (IsMovInstruction(ins)) + { + assert(!"Please use emitIns_Mov() to correctly handle move elision"); + emitIns_Mov(ins, attr, reg1, reg2, /* canSkip */ false, opt); + } + + emitAttr size = EA_SIZE(attr); + emitAttr elemsize = EA_UNKNOWN; + insFormat fmt = IF_NONE; + + /* Figure out the encoding format of the instruction */ + switch (ins) + { case INS_dup: // Vector operation assert(insOptsAnyArrangement(opt)); @@ -4196,21 +4321,6 @@ void emitter::emitIns_R_R( fmt = IF_DR_2E; break; - case INS_sxtw: - assert(size == EA_8BYTE); - FALLTHROUGH; - - case INS_sxtb: - case INS_sxth: - case INS_uxtb: - case INS_uxth: - assert(insOptsNone(opt)); - assert(isValidGeneralDatasize(size)); - assert(isGeneralRegister(reg1)); - assert(isGeneralRegister(reg2)); - fmt = IF_DR_2H; - break; - case INS_sxtl: case INS_sxtl2: case INS_uxtl: @@ -4391,52 +4501,6 @@ void emitter::emitIns_R_R( emitIns_R_R_R(INS_ldaddl, attr, reg1, REG_ZR, reg2); return; - case INS_fmov: - assert(isValidVectorElemsizeFloat(size)); - - // Is the mov even necessary? - if (reg1 == reg2) - { - return; - } - - if (isVectorRegister(reg1)) - { - if (isVectorRegister(reg2)) - { - assert(insOptsNone(opt)); - fmt = IF_DV_2G; - } - else - { - assert(isGeneralRegister(reg2)); - - // if the optional conversion specifier is not present we calculate it - if (opt == INS_OPTS_NONE) - { - opt = (size == EA_4BYTE) ? INS_OPTS_4BYTE_TO_S : INS_OPTS_8BYTE_TO_D; - } - assert(insOptsConvertIntToFloat(opt)); - - fmt = IF_DV_2I; - } - } - else - { - assert(isGeneralRegister(reg1)); - assert(isVectorRegister(reg2)); - - // if the optional conversion specifier is not present we calculate it - if (opt == INS_OPTS_NONE) - { - opt = (size == EA_4BYTE) ? INS_OPTS_S_TO_4BYTE : INS_OPTS_D_TO_8BYTE; - } - assert(insOptsConvertFloatToInt(opt)); - - fmt = IF_DV_2H; - } - break; - case INS_fcmp: case INS_fcmpe: assert(insOptsNone(opt)); @@ -5580,11 +5644,7 @@ void emitter::emitIns_R_R_I( // Is it just a mov? if (imm == 0) { - // Is the mov even necessary? - if (reg1 != reg2) - { - emitIns_R_R(INS_mov, attr, reg1, reg2); - } + emitIns_Mov(INS_mov, attr, reg1, reg2, /* canSkip */ true); return; } @@ -15463,6 +15523,34 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins #endif // defined(DEBUG) || defined(LATE_DISASM) +//------------------------------------------------------------------------ +// IsMovInstruction: Determines whether a give instruction is a move instruction +// +// Arguments: +// ins -- The instruction being checked +// +bool emitter::IsMovInstruction(instruction ins) +{ + switch (ins) + { + case INS_fmov: + case INS_mov: + case INS_sxtb: + case INS_sxth: + case INS_sxtw: + case INS_uxtb: + case INS_uxth: + { + return true; + } + + default: + { + return false; + } + } +} + //---------------------------------------------------------------------------------------- // IsRedundantMov: // Check if the current `mov` instruction is redundant and can be omitted. @@ -15488,14 +15576,27 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins // size - Operand size of current instruction // dst - The current destination // src - The current source +// canSkip - The move can be skipped as it doesn't represent special semantics // // Return Value: // true if previous instruction moved from current dst to src. -bool emitter::IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src) +bool emitter::IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src, bool canSkip) { assert(ins == INS_mov); + if (canSkip && (dst == src)) + { + // These elisions used to be explicit even when optimizations were disabled + return true; + } + + if (!emitComp->opts.OptimizationEnabled()) + { + // The remaining move elisions should only happen if optimizations are enabled + return false; + } + if (dst == src) { // A mov with a EA_4BYTE has the side-effect of clearing the upper bits diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 841f10bf297fb..b25659368c1d1 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -117,7 +117,8 @@ static UINT64 Replicate_helper(UINT64 value, unsigned width, emitAttr size); // Method to do check if mov is redundant with respect to the last instruction. // If yes, the caller of this method can choose to omit current mov instruction. -bool IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src); +static bool IsMovInstruction(instruction ins); +bool IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src, bool canSkip); bool IsRedundantLdStr(instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt); /************************************************************************ @@ -731,6 +732,9 @@ void emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t imm, ins void emitIns_R_F(instruction ins, emitAttr attr, regNumber reg, double immDbl, insOpts opt = INS_OPTS_NONE); +void emitIns_Mov( + instruction ins, emitAttr attr, regNumber dstReg, regNumber srcReg, bool canSkip, insOpts opt = INS_OPTS_NONE); + void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insOpts opt = INS_OPTS_NONE); void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insFlags flags) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 516c976fe009a..5fd16415d4f9c 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -4153,6 +4153,192 @@ void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fld emitAdjustStackDepthPushPop(ins); } +//------------------------------------------------------------------------ +// IsMovInstruction: Determines whether a give instruction is a move instruction +// +// Arguments: +// ins -- The instruction being checked +// +bool emitter::IsMovInstruction(instruction ins) +{ + switch (ins) + { + case INS_mov: + case INS_movapd: + case INS_movaps: + case INS_movd: + case INS_movdqa: + case INS_movdqu: + case INS_movsd: + case INS_movsdsse2: + case INS_movss: + case INS_movsx: + case INS_movupd: + case INS_movups: + case INS_movzx: + { + return true; + } + +#if defined(TARGET_AMD64) + case INS_movq: + case INS_movsxd: + { + return true; + } +#endif // TARGET_AMD64 + + default: + { + return false; + } + } +} + +//------------------------------------------------------------------------ +// emitIns_Mov: Emits a move instruction +// +// Arguments: +// ins -- The instruction being emitted +// attr -- The emit attribute +// dstReg -- The destination register +// srcReg -- The source register +// canSkip -- true if the move can be elided when dstReg == srcReg, otherwise false +// +void emitter::emitIns_Mov(instruction ins, emitAttr attr, regNumber dstReg, regNumber srcReg, bool canSkip) +{ + // Only move instructions can use emitIns_Mov + assert(IsMovInstruction(ins)); + +#if DEBUG + switch (ins) + { + case INS_mov: + case INS_movsx: + case INS_movzx: + { + assert(isGeneralRegister(dstReg) && isGeneralRegister(srcReg)); + break; + } + + case INS_movapd: + case INS_movaps: + case INS_movdqa: + case INS_movdqu: + case INS_movsd: + case INS_movsdsse2: + case INS_movss: + case INS_movupd: + case INS_movups: + { + assert(isFloatReg(dstReg) && isFloatReg(srcReg)); + break; + } + + case INS_movd: + { + assert(isFloatReg(dstReg) != isFloatReg(srcReg)); + break; + } + +#if defined(TARGET_AMD64) + case INS_movq: + { + assert(isFloatReg(dstReg) && isFloatReg(srcReg)); + break; + } + + case INS_movsxd: + { + assert(isGeneralRegister(dstReg) && isGeneralRegister(srcReg)); + break; + } +#endif // TARGET_AMD64 + + default: + { + unreached(); + } + } +#endif + + emitAttr size = EA_SIZE(attr); + + assert(size <= EA_32BYTE); + noway_assert(emitVerifyEncodable(ins, size, dstReg, srcReg)); + + if (canSkip && (dstReg == srcReg)) + { + switch (ins) + { + case INS_mov: + { + // These instructions have no side effect and can be skipped + return; + } + + case INS_movapd: + case INS_movaps: + case INS_movdqa: + case INS_movdqu: + case INS_movupd: + case INS_movups: + { + // These instructions have no side effect and can be skipped + return; + } + + case INS_movd: + case INS_movsd: + case INS_movsdsse2: + case INS_movss: + case INS_movsx: + case INS_movzx: + { + // These instructions have a side effect and shouldn't be skipped + // however existing codepaths were skipping these instructions in + // certain scenarios and so we skip them as well for back-compat. + // + // Long term, these paths should be audited and should likely be + // replaced with copies rather than extensions. + return; + } + +#if defined(TARGET_AMD64) + case INS_movq: + case INS_movsxd: + { + // These instructions have a side effect and shouldn't be skipped + // however existing codepaths were skipping these instructions in + // certain scenarios and so we skip them as well for back-compat. + // + // Long term, these paths should be audited and should likely be + // replaced with copies rather than extensions. + return; + } +#endif // TARGET_AMD64 + + default: + { + unreached(); + } + } + } + + UNATIVE_OFFSET sz = emitInsSizeRR(ins, dstReg, srcReg, attr); + insFormat fmt = emitInsModeFormat(ins, IF_RRD_RRD); + + instrDesc* id = emitNewInstrSmall(attr); + id->idIns(ins); + id->idInsFmt(fmt); + id->idReg1(dstReg); + id->idReg2(srcReg); + id->idCodeSize(sz); + + dispIns(id); + emitCurIGsize += sz; +} + /***************************************************************************** * * Add an instruction with two register operands. @@ -4160,18 +4346,13 @@ void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fld void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2) { - emitAttr size = EA_SIZE(attr); - - /* We don't want to generate any useless mov instructions! */ - CLANG_FORMAT_COMMENT_ANCHOR; + if (IsMovInstruction(ins)) + { + assert(!"Please use emitIns_Mov() to correctly handle move elision"); + emitIns_Mov(ins, attr, reg1, reg2, /* canSkip */ false); + } -#ifdef TARGET_AMD64 - // Same-reg 4-byte mov can be useful because it performs a - // zero-extension to 8 bytes. - assert(ins != INS_mov || reg1 != reg2 || size == EA_4BYTE); -#else - assert(ins != INS_mov || reg1 != reg2); -#endif // TARGET_AMD64 + emitAttr size = EA_SIZE(attr); assert(size <= EA_32BYTE); noway_assert(emitVerifyEncodable(ins, size, reg1, reg2)); @@ -5833,10 +6014,7 @@ void emitter::emitIns_SIMD_R_R_I(instruction ins, emitAttr attr, regNumber targe } else { - if (op1Reg != targetReg) - { - emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); - } + emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); emitIns_R_I(ins, attr, targetReg, ival); } } @@ -5861,10 +6039,7 @@ void emitter::emitIns_SIMD_R_R_A( } else { - if (op1Reg != targetReg) - { - emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); - } + emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); emitIns_R_A(ins, attr, targetReg, indir); } } @@ -5890,10 +6065,7 @@ void emitter::emitIns_SIMD_R_R_AR( } else { - if (op1Reg != targetReg) - { - emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); - } + emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); emitIns_R_AR(ins, attr, targetReg, base, offset); } } @@ -5919,10 +6091,7 @@ void emitter::emitIns_SIMD_R_R_C( } else { - if (op1Reg != targetReg) - { - emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); - } + emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); emitIns_R_C(ins, attr, targetReg, fldHnd, offs); } } @@ -5947,14 +6116,19 @@ void emitter::emitIns_SIMD_R_R_R( } else { - if (op1Reg != targetReg) - { - // Ensure we aren't overwriting op2 - assert(op2Reg != targetReg); + // Ensure we aren't overwriting op2 + assert((op2Reg != targetReg) || (op1Reg == targetReg)); + + emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); - emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); + if (IsMovInstruction(ins)) + { + emitIns_Mov(ins, attr, targetReg, op2Reg, /* canSkip */ false); + } + else + { + emitIns_R_R(ins, attr, targetReg, op2Reg); } - emitIns_R_R(ins, attr, targetReg, op2Reg); } } @@ -5979,10 +6153,7 @@ void emitter::emitIns_SIMD_R_R_S( } else { - if (op1Reg != targetReg) - { - emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); - } + emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); emitIns_R_S(ins, attr, targetReg, varx, offs); } } @@ -6009,10 +6180,7 @@ void emitter::emitIns_SIMD_R_R_A_I( } else { - if (op1Reg != targetReg) - { - emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); - } + emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); emitIns_R_A_I(ins, attr, targetReg, indir, ival); } } @@ -6038,10 +6206,7 @@ void emitter::emitIns_SIMD_R_R_AR_I( } else { - if (op1Reg != targetReg) - { - emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); - } + emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); emitIns_R_AR_I(ins, attr, targetReg, base, 0, ival); } } @@ -6073,10 +6238,7 @@ void emitter::emitIns_SIMD_R_R_C_I(instruction ins, } else { - if (op1Reg != targetReg) - { - emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); - } + emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); emitIns_R_C_I(ins, attr, targetReg, fldHnd, offs, ival); } } @@ -6102,13 +6264,10 @@ void emitter::emitIns_SIMD_R_R_R_I( } else { - if (op1Reg != targetReg) - { - // Ensure we aren't overwriting op2 - assert(op2Reg != targetReg); + // Ensure we aren't overwriting op2 + assert((op2Reg != targetReg) || (op1Reg == targetReg)); - emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); - } + emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); emitIns_R_R_I(ins, attr, targetReg, op2Reg, ival); } } @@ -6135,10 +6294,7 @@ void emitter::emitIns_SIMD_R_R_S_I( } else { - if (op1Reg != targetReg) - { - emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); - } + emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); emitIns_R_S_I(ins, attr, targetReg, varx, offs, ival); } } @@ -6161,14 +6317,10 @@ void emitter::emitIns_SIMD_R_R_R_A( assert(IsFMAInstruction(ins)); assert(UseVEXEncoding()); - if (op1Reg != targetReg) - { - // Ensure we aren't overwriting op2 - assert(op2Reg != targetReg); - - emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); - } + // Ensure we aren't overwriting op2 + assert((op2Reg != targetReg) || (op1Reg == targetReg)); + emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); emitIns_R_R_A(ins, attr, targetReg, op2Reg, indir); } @@ -6190,14 +6342,10 @@ void emitter::emitIns_SIMD_R_R_R_AR( assert(IsFMAInstruction(ins)); assert(UseVEXEncoding()); - if (op1Reg != targetReg) - { - // Ensure we aren't overwriting op2 - assert(op2Reg != targetReg); - - emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); - } + // Ensure we aren't overwriting op2 + assert((op2Reg != targetReg) || (op1Reg == targetReg)); + emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); emitIns_R_R_AR(ins, attr, targetReg, op2Reg, base, 0); } @@ -6225,14 +6373,10 @@ void emitter::emitIns_SIMD_R_R_R_C(instruction ins, assert(IsFMAInstruction(ins)); assert(UseVEXEncoding()); - if (op1Reg != targetReg) - { - // Ensure we aren't overwriting op2 - assert(op2Reg != targetReg); - - emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); - } + // Ensure we aren't overwriting op2 + assert((op2Reg != targetReg) || (op1Reg == targetReg)); + emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); emitIns_R_R_C(ins, attr, targetReg, op2Reg, fldHnd, offs); } @@ -6255,16 +6399,11 @@ void emitter::emitIns_SIMD_R_R_R_R( { assert(UseVEXEncoding()); - if (op1Reg != targetReg) - { - // Ensure we aren't overwriting op2 or op3 - - assert(op2Reg != targetReg); - assert(op3Reg != targetReg); - - emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); - } + // Ensure we aren't overwriting op2 or op3 + assert((op2Reg != targetReg) || (op1Reg == targetReg)); + assert((op3Reg != targetReg) || (op1Reg == targetReg)); + emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); emitIns_R_R_R(ins, attr, targetReg, op2Reg, op3Reg); } else if (UseVEXEncoding()) @@ -6291,23 +6430,19 @@ void emitter::emitIns_SIMD_R_R_R_R( else { assert(isSse41Blendv(ins)); + + // Ensure we aren't overwriting op1 or op2 + assert((op1Reg != REG_XMM0) || (op3Reg == REG_XMM0)); + assert((op2Reg != REG_XMM0) || (op3Reg == REG_XMM0)); + // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0 - if (op3Reg != REG_XMM0) - { - // Ensure we aren't overwriting op1 or op2 - assert(op1Reg != REG_XMM0); - assert(op2Reg != REG_XMM0); + emitIns_Mov(INS_movaps, attr, REG_XMM0, op3Reg, /* canSkip */ true); - emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg); - } - if (op1Reg != targetReg) - { - // Ensure we aren't overwriting op2 or oop3 (which should be REG_XMM0) - assert(op2Reg != targetReg); - assert(targetReg != REG_XMM0); + // Ensure we aren't overwriting op2 or oop3 (which should be REG_XMM0) + assert((op2Reg != targetReg) || (op1Reg == targetReg)); + assert(targetReg != REG_XMM0); - emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); - } + emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); emitIns_R_R(ins, attr, targetReg, op2Reg); } } @@ -6331,14 +6466,10 @@ void emitter::emitIns_SIMD_R_R_R_S( assert(IsFMAInstruction(ins)); assert(UseVEXEncoding()); - if (op1Reg != targetReg) - { - // Ensure we aren't overwriting op2 - assert(op2Reg != targetReg); - - emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); - } + // Ensure we aren't overwriting op2 + assert((op2Reg != targetReg) || (op1Reg == targetReg)); + emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); emitIns_R_R_S(ins, attr, targetReg, op2Reg, varx, offs); } @@ -6394,22 +6525,16 @@ void emitter::emitIns_SIMD_R_R_A_R( { assert(isSse41Blendv(ins)); - // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0 - if (op3Reg != REG_XMM0) - { - // Ensure we aren't overwriting op1 - assert(op1Reg != REG_XMM0); + // Ensure we aren't overwriting op1 + assert(op1Reg != REG_XMM0); - emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg); - } - if (op1Reg != targetReg) - { - // Ensure we aren't overwriting op3 (which should be REG_XMM0) - assert(targetReg != REG_XMM0); + // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0 + emitIns_Mov(INS_movaps, attr, REG_XMM0, op3Reg, /* canSkip */ true); - emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); - } + // Ensure we aren't overwriting op3 (which should be REG_XMM0) + assert(targetReg != REG_XMM0); + emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); emitIns_R_A(ins, attr, targetReg, indir); } } @@ -6466,22 +6591,16 @@ void emitter::emitIns_SIMD_R_R_AR_R( { assert(isSse41Blendv(ins)); + // Ensure we aren't overwriting op1 + assert(op1Reg != REG_XMM0); + // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0 - if (op3Reg != REG_XMM0) - { - // Ensure we aren't overwriting op1 - assert(op1Reg != REG_XMM0); + emitIns_Mov(INS_movaps, attr, REG_XMM0, op3Reg, /* canSkip */ true); - emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg); - } - if (op1Reg != targetReg) - { - // Ensure we aren't overwriting op3 (which should be REG_XMM0) - assert(targetReg != REG_XMM0); - - emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); - } + // Ensure we aren't overwriting op3 (which should be REG_XMM0) + assert(targetReg != REG_XMM0); + emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); emitIns_R_AR(ins, attr, targetReg, base, 0); } } @@ -6544,22 +6663,16 @@ void emitter::emitIns_SIMD_R_R_C_R(instruction ins, { assert(isSse41Blendv(ins)); - // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0 - if (op3Reg != REG_XMM0) - { - // Ensure we aren't overwriting op1 - assert(op1Reg != REG_XMM0); + // Ensure we aren't overwriting op1 + assert(op1Reg != REG_XMM0); - emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg); - } - if (op1Reg != targetReg) - { - // Ensure we aren't overwriting op3 (which should be REG_XMM0) - assert(targetReg != REG_XMM0); + // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0 + emitIns_Mov(INS_movaps, attr, REG_XMM0, op3Reg, /* canSkip */ true); - emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); - } + // Ensure we aren't overwriting op3 (which should be REG_XMM0) + assert(targetReg != REG_XMM0); + emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); emitIns_R_C(ins, attr, targetReg, fldHnd, offs); } } @@ -6617,22 +6730,16 @@ void emitter::emitIns_SIMD_R_R_S_R( { assert(isSse41Blendv(ins)); - // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0 - if (op3Reg != REG_XMM0) - { - // Ensure we aren't overwriting op1 - assert(op1Reg != REG_XMM0); + // Ensure we aren't overwriting op1 + assert(op1Reg != REG_XMM0); - emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg); - } - if (op1Reg != targetReg) - { - // Ensure we aren't overwriting op3 (which should be REG_XMM0) - assert(targetReg != REG_XMM0); + // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0 + emitIns_Mov(INS_movaps, attr, REG_XMM0, op3Reg, /* canSkip */ true); - emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); - } + // Ensure we aren't overwriting op3 (which should be REG_XMM0) + assert(targetReg != REG_XMM0); + emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); emitIns_R_S(ins, attr, targetReg, varx, offs); } } diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index af8dffc301208..d395a29ec9b13 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -94,6 +94,7 @@ code_t AddRexPrefix(instruction ins, code_t code); bool EncodedBySSE38orSSE3A(instruction ins); bool Is4ByteSSEInstruction(instruction ins); +static bool IsMovInstruction(instruction ins); bool AreUpper32BitsZero(regNumber reg); @@ -310,6 +311,8 @@ void emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, int void emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t val); +void emitIns_Mov(instruction ins, emitAttr attr, regNumber dstReg, regNumber srgReg, bool canSkip); + void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2); void emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int ival); diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index ae28773781684..8ae7457dbc200 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -280,10 +280,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert(targetReg != op2Reg); assert(targetReg != op3Reg); - if (targetReg != op1Reg) - { - GetEmitter()->emitIns_R_R(INS_mov, emitTypeSize(node), targetReg, op1Reg); - } + GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true); HWIntrinsicImmOpHelper helper(this, intrin.op4, node); @@ -313,10 +310,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert(targetReg != op2Reg); assert(targetReg != op3Reg); - if (targetReg != op1Reg) - { - GetEmitter()->emitIns_R_R(INS_mov, emitTypeSize(node), targetReg, op1Reg); - } + GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true); GetEmitter()->emitIns_R_R_R_I(ins, emitSize, targetReg, op2Reg, op3Reg, 0, opt); } @@ -333,10 +327,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) if (isRMW) { - if (targetReg != op1Reg) - { - GetEmitter()->emitIns_R_R(INS_mov, emitTypeSize(node), targetReg, op1Reg); - } + GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true); HWIntrinsicImmOpHelper helper(this, intrin.op3, node); @@ -374,10 +365,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { assert(targetReg != op2Reg); - if (targetReg != op1Reg) - { - GetEmitter()->emitIns_R_R(INS_mov, emitTypeSize(node), targetReg, op1Reg); - } + GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true); GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op2Reg, opt); } else @@ -391,11 +379,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert(targetReg != op2Reg); assert(targetReg != op3Reg); - if (targetReg != op1Reg) - { - GetEmitter()->emitIns_R_R(INS_mov, emitTypeSize(node), targetReg, op1Reg); - } - + GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true); GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op2Reg, op3Reg, opt); break; @@ -487,7 +471,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } else { - GetEmitter()->emitIns_R_R(INS_mov, emitSize, targetReg, op1Reg); + GetEmitter()->emitIns_Mov(INS_mov, emitSize, targetReg, op1Reg, /* canSkip */ false); GetEmitter()->emitIns_R_R_R(INS_bsl, emitSize, targetReg, op2Reg, op3Reg, opt); } break; @@ -585,10 +569,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AdvSimd_Insert: assert(isRMW); - if (targetReg != op1Reg) - { - GetEmitter()->emitIns_R_R(INS_mov, emitTypeSize(node), targetReg, op1Reg); - } + GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true); if (intrin.op3->isContainedFltOrDblImmed()) { @@ -630,10 +611,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert(isRMW); assert(targetReg != op3Reg); - if (targetReg != op1Reg) - { - GetEmitter()->emitIns_R_R(INS_mov, emitTypeSize(node), targetReg, op1Reg); - } + GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true); HWIntrinsicImmOpHelper helper(this, intrin.op2, node); @@ -651,10 +629,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert(isRMW); assert(targetReg != op3Reg); - if (targetReg != op1Reg) - { - GetEmitter()->emitIns_R_R(INS_mov, emitTypeSize(node), targetReg, op1Reg); - } + GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true); const int resultIndex = (int)intrin.op2->AsIntCon()->gtIconVal; const int valueIndex = (int)intrin.op4->AsIntCon()->gtIconVal; @@ -667,10 +642,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert(isRMW); assert(targetReg != op3Reg); - if (targetReg != op1Reg) - { - GetEmitter()->emitIns_R_R(INS_mov, emitTypeSize(node), targetReg, op1Reg); - } + GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true); HWIntrinsicImmOpHelper helper(this, intrin.op2, node); @@ -721,7 +693,9 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) else if (varTypeIsFloating(intrin.baseType)) { // fmov reg1, reg2 - GetEmitter()->emitIns_R_R(ins, emitTypeSize(intrin.baseType), targetReg, op1Reg, INS_OPTS_NONE); + assert(GetEmitter()->IsMovInstruction(ins)); + GetEmitter()->emitIns_Mov(ins, emitTypeSize(intrin.baseType), targetReg, op1Reg, + /* canSkip */ false, INS_OPTS_NONE); } else { @@ -752,11 +726,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { ins = varTypeIsUnsigned(intrin.baseType) ? INS_usqadd : INS_suqadd; - if (targetReg != op1Reg) - { - GetEmitter()->emitIns_R_R(INS_mov, emitTypeSize(node), targetReg, op1Reg); - } - + GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true); GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op2Reg, opt); } else @@ -791,7 +761,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) else if (intrin.id == NI_AdvSimd_Arm64_DuplicateToVector64) { assert(intrin.baseType == TYP_DOUBLE); - GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt); + assert(GetEmitter()->IsMovInstruction(ins)); + GetEmitter()->emitIns_Mov(ins, emitSize, targetReg, op1Reg, /* canSkip */ false, opt); } else { @@ -803,6 +774,10 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) const ssize_t dataValue = intrin.op1->AsIntCon()->gtIconVal; GetEmitter()->emitIns_R_I(INS_movi, emitSize, targetReg, dataValue, opt); } + else if (GetEmitter()->IsMovInstruction(ins)) + { + GetEmitter()->emitIns_Mov(ins, emitSize, targetReg, op1Reg, /* canSkip */ false, opt); + } else { GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt); @@ -811,15 +786,12 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; case NI_Vector64_ToVector128: - GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg); + GetEmitter()->emitIns_Mov(ins, emitSize, targetReg, op1Reg, /* canSkip */ false); break; case NI_Vector64_ToVector128Unsafe: case NI_Vector128_GetLower: - if (op1Reg != targetReg) - { - GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg); - } + GetEmitter()->emitIns_Mov(ins, emitSize, targetReg, op1Reg, /* canSkip */ true); break; case NI_Vector64_GetElement: diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 42711acaed83a..9680e91fde384 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -315,10 +315,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert(targetReg == REG_NA); // SSE2 MaskMove hardcodes the destination (op3) in DI/EDI/RDI - if (op3Reg != REG_EDI) - { - emit->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_EDI, op3Reg); - } + emit->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_EDI, op3Reg, /* canSkip */ true); + emit->emitIns_R_R(ins, simdSize, op1Reg, op2Reg); } } @@ -531,6 +529,10 @@ void CodeGen::genHWIntrinsic_R_RM( emit->emitIns_R_S(ins, attr, reg, varNum, offset); } + else if (emit->IsMovInstruction(ins)) + { + emit->emitIns_Mov(ins, attr, reg, rmOp->GetRegNum(), /* canSkip */ false); + } else { emit->emitIns_R_R(ins, attr, reg, rmOp->GetRegNum()); @@ -1173,10 +1175,10 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node) { genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1); } - else if (targetReg != op1Reg) + else { // Just use movaps for reg->reg moves as it has zero-latency on modern CPUs - emit->emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); + emit->emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); } } break; @@ -1193,10 +1195,10 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node) { genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1); } - else if (targetReg != op1Reg) + else { // Just use movaps for reg->reg moves as it has zero-latency on modern CPUs - emit->emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); + emit->emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); } break; } @@ -1216,7 +1218,7 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node) else { // Just use movaps for reg->reg moves as it has zero-latency on modern CPUs - emit->emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); + emit->emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ false); } break; } @@ -1228,10 +1230,10 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node) { genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1); } - else if (targetReg != op1Reg) + else { // Just use movaps for reg->reg moves as it has zero-latency on modern CPUs - emit->emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); + emit->emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); } break; } @@ -1441,19 +1443,21 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) case NI_SSE2_X64_ConvertToUInt64: { assert(op2 == nullptr); - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); + emitAttr attr; if (varTypeIsIntegral(baseType)) { assert(baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG); - op1Reg = op1->GetRegNum(); - emit->emitIns_R_R(ins, emitActualTypeSize(baseType), targetReg, op1Reg); + attr = emitActualTypeSize(baseType); } else { assert(baseType == TYP_DOUBLE || baseType == TYP_FLOAT); - genHWIntrinsic_R_RM(node, ins, emitTypeSize(targetType), targetReg, op1); + attr = emitTypeSize(targetType); } + + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); + genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1); break; } @@ -1551,7 +1555,7 @@ void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node) { // extract instructions return to GP-registers, so it needs int size as the emitsize inst_RV_TT_IV(ins, emitTypeSize(TYP_INT), tmpTargetReg, op1, i); - emit->emitIns_R_R(INS_movd, EA_4BYTE, targetReg, tmpTargetReg); + emit->emitIns_Mov(INS_movd, EA_4BYTE, targetReg, tmpTargetReg, /* canSkip */ false); } else { @@ -1614,11 +1618,8 @@ void CodeGen::genSSE42Intrinsic(GenTreeHWIntrinsic* node) case NI_SSE42_Crc32: case NI_SSE42_X64_Crc32: { - if (op1Reg != targetReg) - { - assert(op2->GetRegNum() != targetReg); - emit->emitIns_R_R(INS_mov, emitTypeSize(targetType), targetReg, op1Reg); - } + assert((op2->GetRegNum() != targetReg) || (op1Reg == targetReg)); + emit->emitIns_Mov(INS_mov, emitTypeSize(targetType), targetReg, op1Reg, /* canSkip */ true); if ((baseType == TYP_UBYTE) || (baseType == TYP_USHORT)) // baseType is the type of the second argument { @@ -1677,7 +1678,7 @@ void CodeGen::genAvxOrAvx2Intrinsic(GenTreeHWIntrinsic* node) assert(numArgs == 1); assert((baseType == TYP_INT) || (baseType == TYP_UINT)); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); - emit->emitIns_R_R(ins, emitActualTypeSize(baseType), targetReg, op1Reg); + emit->emitIns_Mov(ins, emitActualTypeSize(baseType), targetReg, op1Reg, /* canSkip */ false); break; } @@ -1742,13 +1743,10 @@ void CodeGen::genAvxOrAvx2Intrinsic(GenTreeHWIntrinsic* node) // copy op4Reg into the tmp mask register, // the mask register will be cleared by gather instructions - emit->emitIns_R_R(INS_movaps, attr, maskReg, op4Reg); + emit->emitIns_Mov(INS_movaps, attr, maskReg, op4Reg, /* canSkip */ false); - if (targetReg != op1Reg) - { - // copy source vector to the target register for masking merge - emit->emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); - } + // copy source vector to the target register for masking merge + emit->emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); } else { @@ -1929,12 +1927,10 @@ void CodeGen::genBMI1OrBMI2Intrinsic(GenTreeHWIntrinsic* node) // These do not support containment assert(!op2->isContained()); emitAttr attr = emitTypeSize(targetType); + // mov the first operand into implicit source operand EDX/RDX - if (op1Reg != REG_EDX) - { - assert(op2Reg != REG_EDX); - emit->emitIns_R_R(INS_mov, attr, REG_EDX, op1Reg); - } + assert((op2Reg != REG_EDX) || (op1Reg == REG_EDX)); + emit->emitIns_Mov(INS_mov, attr, REG_EDX, op1Reg, /* canSkip */ true); // generate code for MULX genHWIntrinsic_R_R_RM(node, ins, attr, targetReg, lowReg, op2); diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index fbe7e754707fc..9ce10458fb08e 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -394,6 +394,57 @@ void CodeGen::inst_RV(instruction ins, regNumber reg, var_types type, emitAttr s GetEmitter()->emitIns_R(ins, size, reg); } +/***************************************************************************** + * + * Generate a "mov reg1, reg2" instruction. + */ +void CodeGen::inst_Mov(var_types dstType, + regNumber dstReg, + regNumber srcReg, + bool canSkip, + emitAttr size, + insFlags flags /* = INS_FLAGS_DONT_CARE */) +{ + instruction ins = ins_Copy(srcReg, dstType); + + if (size == EA_UNKNOWN) + { + size = emitActualTypeSize(dstType); + } + +#ifdef TARGET_ARM + GetEmitter()->emitIns_Mov(ins, size, dstReg, srcReg, canSkip, flags); +#else + GetEmitter()->emitIns_Mov(ins, size, dstReg, srcReg, canSkip); +#endif +} + +/***************************************************************************** + * + * Generate a "mov reg1, reg2" instruction. + */ +void CodeGen::inst_Mov_Extend(var_types srcType, + bool srcInReg, + regNumber dstReg, + regNumber srcReg, + bool canSkip, + emitAttr size, + insFlags flags /* = INS_FLAGS_DONT_CARE */) +{ + instruction ins = ins_Move_Extend(srcType, srcInReg); + + if (size == EA_UNKNOWN) + { + size = emitActualTypeSize(srcType); + } + +#ifdef TARGET_ARM + GetEmitter()->emitIns_Mov(ins, size, dstReg, srcReg, canSkip, flags); +#else + GetEmitter()->emitIns_Mov(ins, size, dstReg, srcReg, canSkip); +#endif +} + /***************************************************************************** * * Generate a "op reg1, reg2" instruction. @@ -1570,7 +1621,8 @@ instruction CodeGen::ins_Move_Extend(var_types srcType, bool srcInReg) if (varTypeIsFloating(srcType)) return INS_vmov; #else - assert(!varTypeIsFloating(srcType)); + if (varTypeIsFloating(srcType)) + return INS_mov; #endif #if defined(TARGET_XARCH) diff --git a/src/coreclr/jit/simdcodegenxarch.cpp b/src/coreclr/jit/simdcodegenxarch.cpp index 0ceac8c6c26fd..4b0b6cfd0b94e 100644 --- a/src/coreclr/jit/simdcodegenxarch.cpp +++ b/src/coreclr/jit/simdcodegenxarch.cpp @@ -405,21 +405,7 @@ void CodeGen::genSIMDScalarMove( switch (moveType) { case SMT_PreserveUpper: - if (srcReg != targetReg) - { - instruction ins = ins_Store(baseType); - if (GetEmitter()->IsDstSrcSrcAVXInstruction(ins)) - { - // In general, when we use a three-operands move instruction, we want to merge the src with - // itself. This is an exception in that we actually want the "merge" behavior, so we must - // specify it with all 3 operands. - inst_RV_RV_RV(ins, targetReg, targetReg, srcReg, emitTypeSize(baseType)); - } - else - { - inst_RV_RV(ins, targetReg, srcReg, baseType, emitTypeSize(baseType)); - } - } + GetEmitter()->emitIns_SIMD_R_R_R(ins_Store(baseType), emitTypeSize(baseType), targetReg, targetReg, srcReg); break; case SMT_ZeroInitUpper: @@ -447,18 +433,13 @@ void CodeGen::genSIMDScalarMove( else { genSIMDZero(targetType, TYP_FLOAT, targetReg); - inst_RV_RV(ins_Store(baseType), targetReg, srcReg); + inst_Mov(baseType, targetReg, srcReg, /* canSkip */ false); } } break; case SMT_ZeroInitUpper_SrcHasUpperZeros: - if (srcReg != targetReg) - { - instruction ins = ins_Copy(baseType); - assert(!GetEmitter()->IsDstSrcSrcAVXInstruction(ins)); - inst_RV_RV(ins, targetReg, srcReg, baseType, emitTypeSize(baseType)); - } + inst_Mov(baseType, targetReg, srcReg, /* canSkip */ true); break; default: @@ -532,12 +513,12 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) // vpbroadcastq targetReg, targetReg regNumber op1loReg = genConsumeReg(op1lo); - inst_RV_RV(ins_Copy(op1loReg, TYP_FLOAT), targetReg, op1loReg, TYP_INT); + inst_Mov(TYP_FLOAT, targetReg, op1loReg, /* canSkip */ false, emitActualTypeSize(TYP_INT)); regNumber tmpReg = simdNode->GetSingleTempReg(); regNumber op1hiReg = genConsumeReg(op1hi); - inst_RV_RV(ins_Copy(op1loReg, TYP_FLOAT), tmpReg, op1hiReg, TYP_INT); + inst_Mov(TYP_FLOAT, tmpReg, op1hiReg, /* canSkip */ false, emitActualTypeSize(TYP_INT)); ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, TYP_SIMD16); GetEmitter()->emitIns_R_I(ins, EA_16BYTE, tmpReg, 4); // shift left by 4 bytes @@ -596,7 +577,7 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) regNumber srcReg = genConsumeReg(op1); if (baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG) { - inst_RV_RV(ins_Copy(srcReg, TYP_FLOAT), targetReg, srcReg, baseType, emitTypeSize(baseType)); + inst_Mov(TYP_FLOAT, targetReg, srcReg, /* canSkip */ false, emitTypeSize(baseType)); srcReg = targetReg; } @@ -645,10 +626,7 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) } else // Vector { - if (op1Reg != targetReg) - { - inst_RV_RV(ins_Copy(op1Reg, TYP_FLOAT), targetReg, op1Reg, baseType, emitTypeSize(baseType)); - } + inst_Mov(TYP_FLOAT, targetReg, op1Reg, /* canSkip */ true, emitTypeSize(baseType)); } ins = getOpForSIMDIntrinsic(SIMDIntrinsicShuffleSSE2, baseType); @@ -736,10 +714,7 @@ void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) noway_assert(offset == simdNode->GetSimdSize()); // Load the initialized value. - if (targetReg != vectorReg) - { - inst_RV_RV(ins_Copy(targetType), targetReg, vectorReg, targetType, emitActualTypeSize(targetType)); - } + inst_Mov(targetType, targetReg, vectorReg, /* canSkip */ true); genProduceReg(simdNode); } @@ -764,10 +739,14 @@ void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode) regNumber op1Reg = genConsumeReg(op1); instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType); - if (simdNode->gtSIMDIntrinsicID != SIMDIntrinsicCast || targetReg != op1Reg) + if (simdNode->gtSIMDIntrinsicID != SIMDIntrinsicCast) { inst_RV_RV(ins, targetReg, op1Reg, targetType, emitActualTypeSize(targetType)); } + else + { + inst_Mov(targetType, targetReg, op1Reg, /* canSkip */ true); + } genProduceReg(simdNode); } @@ -813,11 +792,8 @@ void CodeGen::genSIMDIntrinsic32BitConvert(GenTreeSIMD* simdNode) // vsubps targetReg, tmpReg (convert upper 16 bits of src and put it into targetReg) // vcvtdq2ps tmpReg2, tmpReg2 (convert lower 16 bits of src and put it into tmpReg2) // vaddps targetReg, tmpReg2 (add upper 16 bits and lower 16 bits) - inst_RV_RV(INS_movdqu, tmpReg2, op1Reg, baseType, emitActualTypeSize(targetType)); - if (targetReg != op1Reg) - { - inst_RV_RV(INS_movdqu, targetReg, op1Reg, baseType, emitActualTypeSize(targetType)); - } + inst_Mov(targetType, tmpReg2, op1Reg, /* canSkip */ false); + inst_Mov(targetType, targetReg, op1Reg, /* canSkip */ true); // prepare upper 16 bits GetEmitter()->emitIns_R_I(INS_psrld, emitActualTypeSize(targetType), targetReg, 16); @@ -829,12 +805,12 @@ void CodeGen::genSIMDIntrinsic32BitConvert(GenTreeSIMD* simdNode) // prepare mask #ifdef TARGET_AMD64 GetEmitter()->emitIns_R_I(INS_mov, EA_8BYTE, tmpIntReg, (ssize_t)0X5300000053000000); - inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_ULONG); + inst_Mov(targetType, tmpReg, tmpIntReg, /* canSkip */ false, emitActualTypeSize(TYP_ULONG)); #else if (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported) { GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, tmpIntReg, (ssize_t)0X53000000); - inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_UINT); + inst_Mov(targetType, tmpReg, tmpIntReg, /* canSkip */ false, emitActualTypeSize(TYP_UINT)); } else { @@ -894,13 +870,13 @@ void CodeGen::genSIMDLo64BitConvert(SIMDIntrinsicID intrinsicID, instruction ins = getOpForSIMDIntrinsic(intrinsicID, baseType); if (intrinsicID == SIMDIntrinsicConvertToDouble) { - inst_RV_RV(INS_movd, tmpIntReg, tmpReg, TYP_LONG); + inst_Mov(TYP_LONG, tmpIntReg, tmpReg, /* canSkip */ false); inst_RV_RV(ins, targetReg, tmpIntReg, baseType, emitActualTypeSize(baseType)); } else { inst_RV_RV(ins, tmpIntReg, tmpReg, baseType, emitActualTypeSize(baseType)); - inst_RV_RV(INS_movd, targetReg, tmpIntReg, TYP_LONG); + inst_Mov(simdType, targetReg, tmpIntReg, /* canSkip */ false, emitActualTypeSize(TYP_LONG)); } } @@ -975,11 +951,8 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode) // vorpd tmpReg2, tmpReg // vsubpd tmpReg2, tmpReg (convert lower 32 bits of src and put it into tmpReg2) // vaddpd targetReg, tmpReg2 (add upper 32 bits and lower 32 bits together) - inst_RV_RV(INS_movdqu, tmpReg2, op1Reg, baseType, emitActualTypeSize(simdType)); - if (targetReg != op1Reg) - { - inst_RV_RV(INS_movdqu, targetReg, op1Reg, baseType, emitActualTypeSize(simdType)); - } + inst_Mov(simdType, tmpReg2, op1Reg, /* canSkip */ false); + inst_Mov(simdType, targetReg, op1Reg, /* canSkip */ true); // prepare upper 32 bits GetEmitter()->emitIns_R_I(INS_psrlq, emitActualTypeSize(simdType), targetReg, 32); @@ -991,10 +964,10 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode) // prepare mask for converting upper 32 bits #ifdef TARGET_AMD64 GetEmitter()->emitIns_R_I(INS_mov, EA_8BYTE, tmpIntReg, (ssize_t)0X4530000000000000); - inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_ULONG); + inst_Mov(simdType, tmpReg, tmpIntReg, /* canSkip */ false, emitActualTypeSize(TYP_ULONG)); #else GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, tmpIntReg, (ssize_t)0X45300000); - inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_UINT); + inst_Mov(simdType, tmpReg, tmpIntReg, /* canSkip */ false, emitActualTypeSize(TYP_UINT)); GetEmitter()->emitIns_R_I(INS_pslldq, EA_16BYTE, tmpReg, 4); #endif if (level == SIMD_AVX2_Supported) @@ -1013,10 +986,10 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode) // prepare mask for converting lower 32 bits #ifdef TARGET_AMD64 GetEmitter()->emitIns_R_I(INS_mov, EA_8BYTE, tmpIntReg, (ssize_t)0X4330000000000000); - inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_ULONG); + inst_Mov(simdType, tmpReg, tmpIntReg, /* canSkip */ false, emitActualTypeSize(TYP_ULONG)); #else GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, tmpIntReg, (ssize_t)0X43300000); - inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_UINT); + inst_Mov(simdType, tmpReg, tmpIntReg, /* canSkip */ false, emitActualTypeSize(TYP_UINT)); GetEmitter()->emitIns_R_I(INS_pslldq, EA_16BYTE, tmpReg, 4); #endif if (level == SIMD_AVX2_Supported) @@ -1047,7 +1020,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode) GetEmitter()->emitIns_R_R_I(INS_vextracti128, EA_32BYTE, tmpReg, op1Reg, 0x01); // Put v[3] (the high-order element) in tmpReg2 and convert it. - inst_RV_RV(ins_Copy(simdType), tmpReg2, tmpReg, simdType, emitActualTypeSize(simdType)); + inst_Mov(simdType, tmpReg2, tmpReg, /* canSkip */ false); GetEmitter()->emitIns_R_I(rightShiftIns, emitActualTypeSize(simdType), tmpReg2, 8); genSIMDLo64BitConvert(intrinsicID, simdType, baseType, tmpReg2, tmpIntReg, tmpReg2); @@ -1061,7 +1034,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode) } // Put v[1] in tmpReg. - inst_RV_RV(ins_Copy(simdType), tmpReg, op1Reg, simdType, emitActualTypeSize(simdType)); + inst_Mov(simdType, tmpReg, op1Reg, /* canSkip */ false); GetEmitter()->emitIns_R_I(rightShiftIns, emitActualTypeSize(simdType), tmpReg, 8); // At this point we have v[1] in the low-order 64-bits of tmpReg. Convert it. @@ -1074,10 +1047,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode) genSIMDLo64BitConvert(intrinsicID, simdType, baseType, op1Reg, tmpIntReg, tmpReg); // Merge or copy the results (only at this point are we done with op1Reg). - if (tmpReg != targetReg) - { - inst_RV_RV(INS_movaps, targetReg, tmpReg, simdType, emitActualTypeSize(simdType)); - } + inst_Mov(simdType, targetReg, tmpReg, /* canSkip */ true); if (level == SIMD_AVX2_Supported) { @@ -1085,17 +1055,17 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode) } #else // get the sign bit and put it in tmpReg3 - inst_RV_RV(INS_movdqu, tmpReg3, op1Reg, baseType, emitActualTypeSize(simdType)); + inst_Mov(simdType, tmpReg3, op1Reg, /* canSkip */ false); GetEmitter()->emitIns_R_I(INS_psrlq, emitActualTypeSize(simdType), tmpReg3, 63); GetEmitter()->emitIns_R_I(INS_psllq, emitActualTypeSize(simdType), tmpReg3, 63); // get the absolute value of src and put it into tmpReg2 and targetReg - inst_RV_RV(INS_movdqu, tmpReg2, op1Reg, baseType, emitActualTypeSize(simdType)); + inst_Mov(simdType, tmpReg2, op1Reg, /* canSkip */ false); GetEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(simdType), tmpReg, op1Reg, (int8_t)SHUFFLE_WWYY); GetEmitter()->emitIns_R_I(INS_psrad, emitActualTypeSize(simdType), tmpReg, 32); inst_RV_RV(INS_pxor, tmpReg2, tmpReg, baseType, emitActualTypeSize(simdType)); inst_RV_RV(INS_psubq, tmpReg2, tmpReg, baseType, emitActualTypeSize(simdType)); - inst_RV_RV(INS_movdqu, targetReg, tmpReg2, baseType, emitActualTypeSize(simdType)); + inst_Mov(simdType, targetReg, tmpReg2, /* canSkip */ false); // prepare upper 32 bits GetEmitter()->emitIns_R_I(INS_psrlq, emitActualTypeSize(simdType), targetReg, 32); @@ -1106,7 +1076,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode) // prepare mask for converting upper 32 bits GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, tmpIntReg, (ssize_t)0X45300000); - inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_UINT); + inst_Mov(simdType, tmpReg, tmpIntReg, /* canSkip */ false, emitActualTypeSize(TYP_UINT)); GetEmitter()->emitIns_R_I(INS_pslldq, EA_16BYTE, tmpReg, 4); if (level == SIMD_AVX2_Supported) @@ -1124,7 +1094,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode) // prepare mask for converting lower 32 bits GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, tmpIntReg, (ssize_t)0X43300000); - inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_UINT); + inst_Mov(simdType, tmpReg, tmpIntReg, /* canSkip */ false, emitActualTypeSize(TYP_UINT)); GetEmitter()->emitIns_R_I(INS_pslldq, EA_16BYTE, tmpReg, 4); if (level == SIMD_AVX2_Supported) @@ -1158,7 +1128,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode) GetEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, tmpReg, op1Reg, 0x01); // Put v[3] (the high-order element) in tmpReg2 and convert it. - inst_RV_RV(ins_Copy(simdType), tmpReg2, tmpReg, simdType, emitActualTypeSize(simdType)); + inst_Mov(simdType, tmpReg2, tmpReg, /* canSkip */ false); GetEmitter()->emitIns_R_I(rightShiftIns, emitActualTypeSize(simdType), tmpReg2, 8); genSIMDLo64BitConvert(intrinsicID, simdType, baseType, tmpReg2, tmpIntReg, tmpReg2); @@ -1173,7 +1143,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode) } // Put v[1] in tmpReg. - inst_RV_RV(ins_Copy(simdType), tmpReg, op1Reg, simdType, emitActualTypeSize(simdType)); + inst_Mov(simdType, tmpReg, op1Reg, /* canSkip */ false); GetEmitter()->emitIns_R_I(rightShiftIns, emitActualTypeSize(simdType), tmpReg, 8); // At this point we have v[1] in the low-order 64-bits of tmpReg. Convert it. @@ -1218,10 +1188,7 @@ void CodeGen::genSIMDExtractUpperHalf(GenTreeSIMD* simdNode, regNumber srcReg, r else { instruction shiftIns = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, TYP_SIMD16); - if (tgtReg != srcReg) - { - inst_RV_RV(ins_Copy(simdType), tgtReg, srcReg, simdType, emitSize); - } + inst_Mov(simdType, tgtReg, srcReg, /* canSkip */ true); GetEmitter()->emitIns_R_I(shiftIns, emitSize, tgtReg, 8); } } @@ -1283,9 +1250,9 @@ void CodeGen::genSIMDIntrinsicWiden(GenTreeSIMD* simdNode) assert((ival >= 0) && (ival <= 255)); GetEmitter()->emitIns_R_R_I(INS_vpermq, emitSize, targetReg, op1Reg, (int8_t)ival); } - else if (targetReg != op1Reg) + else { - inst_RV_RV(ins_Copy(simdType), targetReg, op1Reg, simdType, emitSize); + inst_Mov(simdType, targetReg, op1Reg, /* canSkip */ true); } genSIMDZero(simdType, baseType, tmpReg); @@ -1379,7 +1346,7 @@ void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode) GetEmitter()->emitIns_R_R_I(INS_vextracti128, EA_32BYTE, tmpReg, op1Reg, 0x01); GetEmitter()->emitIns_R_R_I(INS_vextracti128, EA_32BYTE, tmpReg2, op2Reg, 0x01); GetEmitter()->emitIns_R_R_I(INS_vinserti128, EA_32BYTE, tmpReg, tmpReg2, 0x01); - inst_RV_RV(ins_Copy(simdType), tmpReg2, op1Reg, simdType, emitSize); + inst_Mov(simdType, tmpReg2, op1Reg, /* canSkip */ false, emitSize); GetEmitter()->emitIns_R_R_I(INS_vinserti128, EA_32BYTE, tmpReg2, op2Reg, 0x01); GetEmitter()->emitIns_R_R_I(INS_pshufd, emitSize, tmpReg, tmpReg, (int8_t)SHUFFLE_XXZX); GetEmitter()->emitIns_R_R_I(INS_pshufd, emitSize, targetReg, tmpReg2, (int8_t)SHUFFLE_XXZX); @@ -1446,8 +1413,8 @@ void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode) { regNumber tmpReg = simdNode->GetSingleTempReg(RBM_ALLFLOAT); - inst_RV_RV(ins_Copy(simdType), targetReg, op1Reg, simdType, emitSize); - inst_RV_RV(ins_Copy(simdType), tmpReg, op2Reg, simdType, emitSize); + inst_Mov(simdType, targetReg, op1Reg, /* canSkip */ false, emitSize); + inst_Mov(simdType, tmpReg, op2Reg, /* canSkip */ false, emitSize); instruction tmpShiftRight = shiftRightIns; if ((baseType == TYP_INT || baseType == TYP_UINT) && level == SIMD_SSE2_Supported) @@ -1507,9 +1474,9 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) { otherReg = op1Reg; } - else if (op1Reg != targetReg) + else { - inst_RV_RV(ins_Copy(targetType), targetReg, op1Reg, targetType, emitActualTypeSize(targetType)); + inst_Mov(targetType, targetReg, op1Reg, /* canSkip */ true); } inst_RV_RV(ins, targetReg, otherReg, targetType, emitActualTypeSize(targetType)); @@ -1572,7 +1539,7 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) } else { - inst_RV_RV(ins_Copy(targetType), targetReg, op1Reg, targetType, emitActualTypeSize(targetType)); + inst_Mov(targetType, targetReg, op1Reg, /* canSkip */ false); } } @@ -1797,10 +1764,7 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) // mov_xmm2i targetReg, tmpReg if (varTypeIsFloating(baseType)) { - if (targetReg != srcReg) - { - inst_RV_RV(ins_Copy(simdType), targetReg, srcReg, simdType, emitActualTypeSize(simdType)); - } + inst_Mov(simdType, targetReg, srcReg, /* canSkip */ true); if (byteShiftCnt != 0) { @@ -1858,7 +1822,8 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) if (ZeroOrSignExtnReqd) { // Zero/sign extend the byte/short to 32-bits - inst_RV_RV(ins_Move_Extend(baseType, false), targetReg, targetReg, baseType, emitTypeSize(baseType)); + inst_Mov_Extend(baseType, /* srcInReg */ false, targetReg, targetReg, /* canSkip */ false, + emitTypeSize(baseType)); } } else @@ -1869,10 +1834,7 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) { assert(tmpReg != REG_NA); - if (tmpReg != srcReg) - { - inst_RV_RV(ins_Copy(simdType), tmpReg, srcReg, simdType, emitActualTypeSize(simdType)); - } + inst_Mov(simdType, tmpReg, srcReg, /* canSkip */ true); assert((byteShiftCnt > 0) && (byteShiftCnt <= 32)); instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, TYP_SIMD16); @@ -1884,7 +1846,7 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) } assert(tmpReg != REG_NA); - inst_RV_RV(ins_Copy(tmpReg, baseType), targetReg, tmpReg, baseType); + inst_Mov(baseType, targetReg, tmpReg, /* canSkip */ false); } } @@ -1948,10 +1910,7 @@ void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode) regNumber op2Reg = op2->GetRegNum(); // TODO-CQ: For AVX we don't need to do a copy because it supports 3 operands plus immediate. - if (targetReg != op1Reg) - { - inst_RV_RV(ins_Copy(targetType), targetReg, op1Reg, targetType, emitActualTypeSize(targetType)); - } + inst_Mov(targetType, targetReg, op1Reg, /* canSkip */ true); // Right now this intrinsic is supported only for float base type vectors. // If in future need to support on other base type vectors, the below @@ -1965,7 +1924,7 @@ void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode) assert(genIsValidIntReg(tmpReg)); // Move the value from xmm reg to an int reg - inst_RV_RV(ins_Copy(op2Reg, TYP_INT), tmpReg, op2Reg, baseType); + inst_Mov(TYP_INT, tmpReg, op2Reg, /* canSkip */ false, emitActualTypeSize(baseType)); assert((index >= 0) && (index <= 15)); @@ -2012,10 +1971,7 @@ void CodeGen::genSIMDIntrinsicShuffleSSE2(GenTreeSIMD* simdNode) assert(targetReg != REG_NA); regNumber op1Reg = genConsumeReg(op1); - if (targetReg != op1Reg) - { - inst_RV_RV(ins_Copy(targetType), targetReg, op1Reg, targetType, emitActualTypeSize(targetType)); - } + inst_Mov(targetType, targetReg, op1Reg, /* canSkip */ true); instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType); assert((shuffleControl >= 0) && (shuffleControl <= 255));