Skip to content

Commit

Permalink
[AMDGPU] Create matchPERM helper from performOrCombine PERM matching …
Browse files Browse the repository at this point in the history
…code.

Pulled out as NFC(ish) pre-commit from D159533
  • Loading branch information
RKSimon committed Sep 22, 2023
1 parent bd02816 commit 6e3827a
Showing 1 changed file with 85 additions and 74 deletions.
159 changes: 85 additions & 74 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11003,6 +11003,89 @@ static bool hasNon16BitAccesses(uint64_t PermMask, SDValue &Op,
return !addresses16Bits(Low16) || !addresses16Bits(Hi16);
}

static SDValue matchPERM(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);

if (VT != MVT::i32)
return SDValue();

// VT is known to be MVT::i32, so we need to provide 4 bytes.
SmallVector<ByteProvider<SDValue>, 8> PermNodes;
for (int i = 0; i < 4; i++) {
// Find the ByteProvider that provides the ith byte of the result of OR
std::optional<ByteProvider<SDValue>> P =
calculateByteProvider(SDValue(N, 0), i, 0, /*StartingIndex = */ i);
// TODO support constantZero
if (!P || P->isConstantZero())
return SDValue();

PermNodes.push_back(*P);
}
if (PermNodes.size() != 4)
return SDValue();

int FirstSrc = 0;
std::optional<int> SecondSrc;
uint64_t PermMask = 0x00000000;
for (size_t i = 0; i < PermNodes.size(); i++) {
auto PermOp = PermNodes[i];
// Since the mask is applied to Src1:Src2, Src1 bytes must be offset
// by sizeof(Src2) = 4
int SrcByteAdjust = 4;

if (!PermOp.hasSameSrc(PermNodes[FirstSrc])) {
if (SecondSrc.has_value())
if (!PermOp.hasSameSrc(PermNodes[*SecondSrc]))
return SDValue();

// Set the index of the second distinct Src node
SecondSrc = i;
assert(!(PermNodes[*SecondSrc].Src->getValueSizeInBits() % 8));
SrcByteAdjust = 0;
}
assert(PermOp.SrcOffset + SrcByteAdjust < 8);
assert(!DAG.getDataLayout().isBigEndian());
PermMask |= (PermOp.SrcOffset + SrcByteAdjust) << (i * 8);
}

SDValue Op = *PermNodes[FirstSrc].Src;
SDValue OtherOp = SecondSrc.has_value() ? *PermNodes[*SecondSrc].Src
: *PermNodes[FirstSrc].Src;

// Check that we are not just extracting the bytes in order from an op
if (Op == OtherOp && Op.getValueSizeInBits() == 32) {
int Low16 = PermMask & 0xffff;
int Hi16 = (PermMask & 0xffff0000) >> 16;

bool WellFormedLow = (Low16 == 0x0504) || (Low16 == 0x0100);
bool WellFormedHi = (Hi16 == 0x0706) || (Hi16 == 0x0302);

// The perm op would really just produce Op. So combine into Op
if (WellFormedLow && WellFormedHi)
return DAG.getBitcast(MVT::getIntegerVT(32), Op);
}

if (hasNon16BitAccesses(PermMask, Op, OtherOp)) {
SDLoc DL(N);
assert(Op.getValueType().isByteSized() &&
OtherOp.getValueType().isByteSized());

// If the ultimate src is less than 32 bits, then we will only be
// using bytes 0: Op.getValueSizeInBytes() - 1 in the or.
// CalculateByteProvider would not have returned Op as source if we
// used a byte that is outside its ValueType. Thus, we are free to
// ANY_EXTEND as the extended bits are dont-cares.
Op = DAG.getBitcastedAnyExtOrTrunc(Op, DL, MVT::i32);
OtherOp = DAG.getBitcastedAnyExtOrTrunc(OtherOp, DL, MVT::i32);

return DAG.getNode(AMDGPUISD::PERM, DL, MVT::i32, Op, OtherOp,
DAG.getConstant(PermMask, DL, MVT::i32));
}

return SDValue();
}

SDValue SITargetLowering::performOrCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
Expand Down Expand Up @@ -11116,80 +11199,8 @@ SDValue SITargetLowering::performOrCombine(SDNode *N,
}
}
if (LHSMask == ~0u || RHSMask == ~0u) {
SmallVector<ByteProvider<SDValue>, 8> PermNodes;

// VT is known to be MVT::i32, so we need to provide 4 bytes.
assert(VT == MVT::i32);
for (int i = 0; i < 4; i++) {
// Find the ByteProvider that provides the ith byte of the result of OR
std::optional<ByteProvider<SDValue>> P =
calculateByteProvider(SDValue(N, 0), i, 0, /*StartingIndex = */ i);
// TODO support constantZero
if (!P || P->isConstantZero())
return SDValue();

PermNodes.push_back(*P);
}
if (PermNodes.size() != 4)
return SDValue();

int FirstSrc = 0;
std::optional<int> SecondSrc;
uint64_t PermMask = 0x00000000;
for (size_t i = 0; i < PermNodes.size(); i++) {
auto PermOp = PermNodes[i];
// Since the mask is applied to Src1:Src2, Src1 bytes must be offset
// by sizeof(Src2) = 4
int SrcByteAdjust = 4;

if (!PermOp.hasSameSrc(PermNodes[FirstSrc])) {
if (SecondSrc.has_value())
if (!PermOp.hasSameSrc(PermNodes[*SecondSrc]))
return SDValue();

// Set the index of the second distinct Src node
SecondSrc = i;
assert(!(PermNodes[*SecondSrc].Src->getValueSizeInBits() % 8));
SrcByteAdjust = 0;
}
assert(PermOp.SrcOffset + SrcByteAdjust < 8);
assert(!DAG.getDataLayout().isBigEndian());
PermMask |= (PermOp.SrcOffset + SrcByteAdjust) << (i * 8);
}

SDValue Op = *PermNodes[FirstSrc].Src;
SDValue OtherOp = SecondSrc.has_value() ? *PermNodes[*SecondSrc].Src
: *PermNodes[FirstSrc].Src;

// Check that we are not just extracting the bytes in order from an op
if (Op == OtherOp && Op.getValueSizeInBits() == 32) {
int Low16 = PermMask & 0xffff;
int Hi16 = (PermMask & 0xffff0000) >> 16;

bool WellFormedLow = (Low16 == 0x0504) || (Low16 == 0x0100);
bool WellFormedHi = (Hi16 == 0x0706) || (Hi16 == 0x0302);

// The perm op would really just produce Op. So combine into Op
if (WellFormedLow && WellFormedHi)
return DAG.getBitcast(MVT::getIntegerVT(32), Op);
}

if (hasNon16BitAccesses(PermMask, Op, OtherOp)) {
SDLoc DL(N);
assert(Op.getValueType().isByteSized() &&
OtherOp.getValueType().isByteSized());

// If the ultimate src is less than 32 bits, then we will only be
// using bytes 0: Op.getValueSizeInBytes() - 1 in the or.
// CalculateByteProvider would not have returned Op as source if we
// used a byte that is outside its ValueType. Thus, we are free to
// ANY_EXTEND as the extended bits are dont-cares.
Op = DAG.getBitcastedAnyExtOrTrunc(Op, DL, MVT::i32);
OtherOp = DAG.getBitcastedAnyExtOrTrunc(OtherOp, DL, MVT::i32);

return DAG.getNode(AMDGPUISD::PERM, DL, MVT::i32, Op, OtherOp,
DAG.getConstant(PermMask, DL, MVT::i32));
}
if (SDValue Perm = matchPERM(N, DCI))
return Perm;
}
}

Expand Down

0 comments on commit 6e3827a

Please sign in to comment.