Skip to content

Commit

Permalink
[AArch64] Add costs for ST3 and ST4 instructions, modelled as store(s…
Browse files Browse the repository at this point in the history
…huffle).

This tries to add some costs for the shuffle in a ST3/ST4 instruction, which
are represented in LLVM IR as store(interleaving shuffle). In order to detect
the store, it needs to add a CxtI context instruction to check the users of the
shuffle. LD3 and LD4 are added, LD2 should be a zip1 shuffle, which will be
added in another patch.

It should help fix some of the regressions from llvm#87510.
  • Loading branch information
davemgreen committed Apr 7, 2024
1 parent d57d094 commit 2faddc9
Show file tree
Hide file tree
Showing 22 changed files with 122 additions and 92 deletions.
26 changes: 13 additions & 13 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1291,12 +1291,11 @@ class TargetTransformInfo {
/// passed through \p Args, which helps improve the cost estimation in some
/// cases, like in broadcast loads.
/// NOTE: For subvector extractions Tp represents the source type.
InstructionCost
getShuffleCost(ShuffleKind Kind, VectorType *Tp,
ArrayRef<int> Mask = std::nullopt,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
int Index = 0, VectorType *SubTp = nullptr,
ArrayRef<const Value *> Args = std::nullopt) const;
InstructionCost getShuffleCost(
ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask = std::nullopt,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, int Index = 0,
VectorType *SubTp = nullptr, ArrayRef<const Value *> Args = std::nullopt,
const Instruction *CxtI = nullptr) const;

/// Represents a hint about the context in which a cast is used.
///
Expand Down Expand Up @@ -2008,11 +2007,10 @@ class TargetTransformInfo::Concept {
const SmallBitVector &OpcodeMask,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const = 0;

virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind,
int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) = 0;
virtual InstructionCost
getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
ArrayRef<const Value *> Args, const Instruction *CxtI) = 0;
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
Type *Src, CastContextHint CCH,
TTI::TargetCostKind CostKind,
Expand Down Expand Up @@ -2647,8 +2645,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args) override {
return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
ArrayRef<const Value *> Args,
const Instruction *CxtI) override {
return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args,
CxtI);
}
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
CastContextHint CCH,
Expand Down
44 changes: 25 additions & 19 deletions llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -579,10 +579,12 @@ class TargetTransformInfoImplBase {
return InstructionCost::getInvalid();
}

InstructionCost
getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
ArrayRef<const Value *> Args = std::nullopt) const {
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty,
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args = std::nullopt,
const Instruction *CxtI = nullptr) const {
return 1;
}

Expand Down Expand Up @@ -1341,13 +1343,13 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
if (Shuffle->isExtractSubvectorMask(SubIndex))
return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
Mask, CostKind, SubIndex, VecTy,
Operands);
Operands, Shuffle);

if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
return TargetTTI->getShuffleCost(
TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex,
FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
Operands);
Operands, Shuffle);

int ReplicationFactor, VF;
if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
Expand All @@ -1374,7 +1376,7 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {

return TargetTTI->getShuffleCost(
IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, VecTy,
AdjustMask, CostKind, 0, nullptr);
AdjustMask, CostKind, 0, nullptr, Shuffle);
}

// Narrowing shuffle - perform shuffle at original wider width and
Expand All @@ -1383,49 +1385,53 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {

InstructionCost ShuffleCost = TargetTTI->getShuffleCost(
IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc,
VecSrcTy, AdjustMask, CostKind, 0, nullptr);
VecSrcTy, AdjustMask, CostKind, 0, nullptr, Shuffle);

SmallVector<int, 16> ExtractMask(Mask.size());
std::iota(ExtractMask.begin(), ExtractMask.end(), 0);
return ShuffleCost + TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector,
VecSrcTy, ExtractMask,
CostKind, 0, VecTy);
return ShuffleCost + TargetTTI->getShuffleCost(
TTI::SK_ExtractSubvector, VecSrcTy,
ExtractMask, CostKind, 0, VecTy, Shuffle);
}

if (Shuffle->isIdentity())
return 0;

if (Shuffle->isReverse())
return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, Mask, CostKind,
0, nullptr, Operands);
0, nullptr, Operands, Shuffle);

if (Shuffle->isSelect())
return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, Mask, CostKind,
0, nullptr, Operands);
0, nullptr, Operands, Shuffle);

if (Shuffle->isTranspose())
return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, Mask,
CostKind, 0, nullptr, Operands);
CostKind, 0, nullptr, Operands,
Shuffle);

if (Shuffle->isZeroEltSplat())
return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, Mask,
CostKind, 0, nullptr, Operands);
CostKind, 0, nullptr, Operands,
Shuffle);

if (Shuffle->isSingleSource())
return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, Mask,
CostKind, 0, nullptr, Operands);
CostKind, 0, nullptr, Operands,
Shuffle);

if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
return TargetTTI->getShuffleCost(
TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex,
FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands);
FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands,
Shuffle);

if (Shuffle->isSplice(SubIndex))
return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, Mask, CostKind,
SubIndex, nullptr, Operands);
SubIndex, nullptr, Operands, Shuffle);

return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, Mask,
CostKind, 0, nullptr, Operands);
CostKind, 0, nullptr, Operands, Shuffle);
}
case Instruction::ExtractElement: {
auto *EEI = dyn_cast<ExtractElementInst>(U);
Expand Down
3 changes: 2 additions & 1 deletion llvm/include/llvm/CodeGen/BasicTTIImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1020,7 +1020,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args = std::nullopt) {
ArrayRef<const Value *> Args = std::nullopt,
const Instruction *CxtI = nullptr) {
switch (improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp)) {
case TTI::SK_Broadcast:
if (auto *FVT = dyn_cast<FixedVectorType>(Tp))
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -916,9 +916,9 @@ InstructionCost TargetTransformInfo::getAltInstrCost(
InstructionCost TargetTransformInfo::getShuffleCost(
ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) const {
InstructionCost Cost =
TTIImpl->getShuffleCost(Kind, Ty, Mask, CostKind, Index, SubTp, Args);
ArrayRef<const Value *> Args, const Instruction *CxtI) const {
InstructionCost Cost = TTIImpl->getShuffleCost(Kind, Ty, Mask, CostKind,
Index, SubTp, Args, CxtI);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
Expand Down
29 changes: 21 additions & 8 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3815,18 +3815,30 @@ InstructionCost AArch64TTIImpl::getSpliceCost(VectorType *Tp, int Index) {
return LegalizationCost * LT.first;
}

InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *Tp,
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind,
int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) {
InstructionCost AArch64TTIImpl::getShuffleCost(
TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
ArrayRef<const Value *> Args, const Instruction *CxtI) {
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);

// If we have a Mask, and the LT is being legalized somehow, split the Mask
// into smaller vectors and sum the cost of each shuffle.
if (!Mask.empty() && isa<FixedVectorType>(Tp) && LT.second.isVector() &&
Tp->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {

// Check for ST3/ST4 instructions, which are represented in llvm IR as
// store(interleaving-shuffle). The shuffle cost could potentially be free,
// but we model it with a cost of LT.first so that LD3/LD3 have a higher
// cost than just the store.
if ((ShuffleVectorInst::isInterleaveMask(
Mask, 4, Tp->getElementCount().getKnownMinValue() * 2) ||
ShuffleVectorInst::isInterleaveMask(
Mask, 3, Tp->getElementCount().getKnownMinValue() * 2)) &&
!ShuffleVectorInst::isZeroEltSplatMask(
Mask, Tp->getElementCount().getKnownMinValue()))
return LT.first;

unsigned TpNumElts = Mask.size();
unsigned LTNumElts = LT.second.getVectorNumElements();
unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
Expand Down Expand Up @@ -3874,7 +3886,7 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
if (NumSources <= 2)
Cost += getShuffleCost(NumSources <= 1 ? TTI::SK_PermuteSingleSrc
: TTI::SK_PermuteTwoSrc,
NTp, NMask, CostKind, 0, nullptr, Args);
NTp, NMask, CostKind, 0, nullptr, Args, CxtI);
else if (any_of(enumerate(NMask), [&](const auto &ME) {
return ME.value() % LTNumElts == ME.index();
}))
Expand Down Expand Up @@ -4055,7 +4067,8 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// Restore optimal kind.
if (IsExtractSubvector)
Kind = TTI::SK_ExtractSubvector;
return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp);
return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args,
CxtI);
}

static bool containsDecreasingPointers(Loop *TheLoop,
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,8 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args = std::nullopt);
ArrayRef<const Value *> Args = std::nullopt,
const Instruction *CxtI = nullptr);

InstructionCost getScalarizationOverhead(VectorType *Ty,
const APInt &DemandedElts,
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1127,7 +1127,8 @@ InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *VT, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind,
int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) {
ArrayRef<const Value *> Args,
const Instruction *CxtI) {
Kind = improveShuffleKindFromMask(Kind, Mask, VT, Index, SubTp);
// Treat extractsubvector as single op permutation.
bool IsExtractSubvector = Kind == TTI::SK_ExtractSubvector;
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,8 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args = std::nullopt);
ArrayRef<const Value *> Args = std::nullopt,
const Instruction *CxtI = nullptr);

bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1212,7 +1212,8 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *Tp, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind,
int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) {
ArrayRef<const Value *> Args,
const Instruction *CxtI) {
Kind = improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp);
// Treat extractsubvector as single op permutation.
bool IsExtractSubvector = Kind == TTI::SK_ExtractSubvector;
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/ARM/ARMTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,8 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args = std::nullopt);
ArrayRef<const Value *> Args = std::nullopt,
const Instruction *CxtI = nullptr);

bool preferInLoopReduction(unsigned Opcode, Type *Ty,
TTI::ReductionFlags Flags) const;
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,8 @@ InstructionCost HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind,
int Index, Type *SubTp,
ArrayRef<const Value *> Args) {
ArrayRef<const Value *> Args,
const Instruction *CxtI) {
return 1;
}

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ class HexagonTTIImpl : public BasicTTIImplBase<HexagonTTIImpl> {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
Type *SubTp,
ArrayRef<const Value *> Args = std::nullopt);
ArrayRef<const Value *> Args = std::nullopt,
const Instruction *CxtI = nullptr);
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
const Value *Ptr, bool VariableMask,
Align Alignment,
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -607,7 +607,8 @@ InstructionCost PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind,
int Index, Type *SubTp,
ArrayRef<const Value *> Args) {
ArrayRef<const Value *> Args,
const Instruction *CxtI) {

InstructionCost CostFactor =
vectorCostAdjustmentFactor(Instruction::ShuffleVector, Tp, nullptr);
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
Type *SubTp,
ArrayRef<const Value *> Args = std::nullopt);
ArrayRef<const Value *> Args = std::nullopt,
const Instruction *CxtI = nullptr);
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::CastContextHint CCH,
TTI::TargetCostKind CostKind,
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *Tp, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind,
int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) {
ArrayRef<const Value *> Args,
const Instruction *CxtI) {
Kind = improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp);

std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,8 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args = std::nullopt);
ArrayRef<const Value *> Args = std::nullopt,
const Instruction *CxtI = nullptr);

InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind);
Expand Down
Loading

0 comments on commit 2faddc9

Please sign in to comment.