Skip to content

Commit

Permalink
SVE: Added Load2xVectorAndUnzip, Load3xVectorAndUnzip, `Load4xVec…
Browse files Browse the repository at this point in the history
…torAndUnzip` APIs (#102180)
  • Loading branch information
TIHan authored May 29, 2024
1 parent 13a984d commit 21aa3b7
Show file tree
Hide file tree
Showing 14 changed files with 1,743 additions and 0 deletions.
10 changes: 10 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26594,6 +26594,9 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const
case NI_Sve_LoadVectorUInt16ZeroExtendToUInt64:
case NI_Sve_LoadVectorUInt32ZeroExtendToInt64:
case NI_Sve_LoadVectorUInt32ZeroExtendToUInt64:
case NI_Sve_Load2xVectorAndUnzip:
case NI_Sve_Load3xVectorAndUnzip:
case NI_Sve_Load4xVectorAndUnzip:
addr = Op(2);
break;
#endif // TARGET_ARM64
Expand Down Expand Up @@ -27116,6 +27119,13 @@ ClassLayout* GenTreeHWIntrinsic::GetLayout(Compiler* compiler) const
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x4:
return compiler->typGetBlkLayout(64);

case NI_Sve_Load2xVectorAndUnzip:
return compiler->typGetBlkLayout(compiler->getVectorTByteLength() * 2);
case NI_Sve_Load3xVectorAndUnzip:
return compiler->typGetBlkLayout(compiler->getVectorTByteLength() * 3);
case NI_Sve_Load4xVectorAndUnzip:
return compiler->typGetBlkLayout(compiler->getVectorTByteLength() * 4);

#endif // TARGET_ARM64

default:
Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1603,6 +1603,14 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
// Op1 input is a vector. HWInstrinsic requires a mask.
retNode->AsHWIntrinsic()->Op(1) = gtNewSimdConvertVectorToMaskNode(retType, op1, simdBaseJitType, simdSize);
}

if (HWIntrinsicInfo::IsMultiReg(intrinsic))
{
assert(HWIntrinsicInfo::IsExplicitMaskedOperation(retNode->AsHWIntrinsic()->GetHWIntrinsicId()));
assert(HWIntrinsicInfo::IsMultiReg(retNode->AsHWIntrinsic()->GetHWIntrinsicId()));
retNode =
impStoreMultiRegValueToVar(retNode, sig->retTypeSigClass DEBUGARG(CorInfoCallConvExtension::Managed));
}
}

if (retType != nodeRetType)
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/hwintrinsic.h
Original file line number Diff line number Diff line change
Expand Up @@ -831,6 +831,7 @@ struct HWIntrinsicInfo
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2:
case NI_AdvSimd_LoadAndReplicateToVector64x2:
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x2:
case NI_Sve_Load2xVectorAndUnzip:
return 2;

case NI_AdvSimd_LoadVector64x3AndUnzip:
Expand All @@ -841,6 +842,7 @@ struct HWIntrinsicInfo
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3:
case NI_AdvSimd_LoadAndReplicateToVector64x3:
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x3:
case NI_Sve_Load3xVectorAndUnzip:
return 3;

case NI_AdvSimd_LoadVector64x4AndUnzip:
Expand All @@ -851,6 +853,7 @@ struct HWIntrinsicInfo
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4:
case NI_AdvSimd_LoadAndReplicateToVector64x4:
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x4:
case NI_Sve_Load4xVectorAndUnzip:
return 4;
#endif

Expand Down
29 changes: 29 additions & 0 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2296,6 +2296,35 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
retNode = impStoreMultiRegValueToVar(op1, sig->retTypeSigClass DEBUGARG(CorInfoCallConvExtension::Managed));
break;
}

case NI_Sve_Load2xVectorAndUnzip:
case NI_Sve_Load3xVectorAndUnzip:
case NI_Sve_Load4xVectorAndUnzip:
{
info.compNeedsConsecutiveRegisters = true;

assert(sig->numArgs == 2);

op2 = impPopStack().val;
op1 = impPopStack().val;

if (op2->OperIs(GT_CAST))
{
// Although the API specifies a pointer, if what we have is a BYREF, that's what
// we really want, so throw away the cast.
if (op2->gtGetOp1()->TypeGet() == TYP_BYREF)
{
op2 = op2->gtGetOp1();
}
}

assert(HWIntrinsicInfo::IsMultiReg(intrinsic));
assert(HWIntrinsicInfo::IsExplicitMaskedOperation(intrinsic));

retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize);
break;
}

case NI_AdvSimd_LoadAndInsertScalarVector64x2:
case NI_AdvSimd_LoadAndInsertScalarVector64x3:
case NI_AdvSimd_LoadAndInsertScalarVector64x4:
Expand Down
20 changes: 20 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1360,6 +1360,26 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
}
break;

case NI_Sve_Load2xVectorAndUnzip:
case NI_Sve_Load3xVectorAndUnzip:
case NI_Sve_Load4xVectorAndUnzip:
{
#ifdef DEBUG
// Validates that consecutive registers were used properly.

assert(node->GetMultiRegCount(compiler) == (unsigned int)GetEmitter()->insGetSveReg1ListSize(ins));

regNumber argReg = targetReg;
for (unsigned int i = 0; i < node->GetMultiRegCount(compiler); i++)
{
assert(argReg == node->GetRegNumByIdx(i));
argReg = getNextSIMDRegWithWraparound(argReg);
}
#endif // DEBUG
GetEmitter()->emitIns_R_R_R_I(ins, emitSize, targetReg, op1Reg, op2Reg, 0, opt);
break;
}

case NI_Sve_StoreAndZipx2:
case NI_Sve_StoreAndZipx3:
case NI_Sve_StoreAndZipx4:
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToUInt32,
HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, Load2xVectorAndUnzip, -1, 2, true, {INS_sve_ld2b, INS_sve_ld2b, INS_sve_ld2h, INS_sve_ld2h, INS_sve_ld2w, INS_sve_ld2w, INS_sve_ld2d, INS_sve_ld2d, INS_sve_ld2w, INS_sve_ld2d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Sve, Load3xVectorAndUnzip, -1, 2, true, {INS_sve_ld3b, INS_sve_ld3b, INS_sve_ld3h, INS_sve_ld3h, INS_sve_ld3w, INS_sve_ld3w, INS_sve_ld3d, INS_sve_ld3d, INS_sve_ld3w, INS_sve_ld3d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Sve, Load4xVectorAndUnzip, -1, 2, true, {INS_sve_ld4b, INS_sve_ld4b, INS_sve_ld4h, INS_sve_ld4h, INS_sve_ld4w, INS_sve_ld4w, INS_sve_ld4d, INS_sve_ld4d, INS_sve_ld4w, INS_sve_ld4d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Sve, Max, -1, -1, false, {INS_sve_smax, INS_sve_umax, INS_sve_smax, INS_sve_umax, INS_sve_smax, INS_sve_umax, INS_sve_smax, INS_sve_umax, INS_sve_fmax, INS_sve_fmax}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, MaxAcross, -1, -1, false, {INS_sve_smaxv, INS_sve_umaxv, INS_sve_smaxv, INS_sve_umaxv, INS_sve_smaxv, INS_sve_umaxv, INS_sve_smaxv, INS_sve_umaxv, INS_sve_fmaxv, INS_sve_fmaxv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, MaxNumber, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmaxnm, INS_sve_fmaxnm}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
Expand Down
13 changes: 13 additions & 0 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1781,6 +1781,19 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
break;
}

case NI_Sve_Load2xVectorAndUnzip:
case NI_Sve_Load3xVectorAndUnzip:
case NI_Sve_Load4xVectorAndUnzip:
{
assert(intrin.op1 != nullptr);
assert(intrin.op2 != nullptr);
assert(intrinsicTree->OperIsMemoryLoadOrStore());
srcCount += BuildAddrUses(intrin.op2);
BuildConsecutiveRegistersForDef(intrinsicTree, dstCount);
*pDstCount = dstCount;
break;
}

case NI_Sve_StoreAndZipx2:
case NI_Sve_StoreAndZipx3:
case NI_Sve_StoreAndZipx4:
Expand Down
Loading

0 comments on commit 21aa3b7

Please sign in to comment.