Skip to content

Commit

Permalink
[AMDGPU] Select gfx1150 SALU Float instructions
Browse files Browse the repository at this point in the history
  • Loading branch information
mbrkusanin committed Sep 20, 2023
1 parent ddd6474 commit 29db22e
Show file tree
Hide file tree
Showing 23 changed files with 4,282 additions and 157 deletions.
19 changes: 19 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -663,6 +663,9 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
case ISD::BRCOND:
SelectBRCOND(N);
return;
case ISD::FP_EXTEND:
SelectFP_EXTEND(N);
return;
case AMDGPUISD::CVT_PKRTZ_F16_F32:
case AMDGPUISD::CVT_PKNORM_I16_F32:
case AMDGPUISD::CVT_PKNORM_U16_F32:
Expand Down Expand Up @@ -2303,6 +2306,22 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
VCC.getValue(0));
}

void AMDGPUDAGToDAGISel::SelectFP_EXTEND(SDNode *N) {
if (Subtarget->hasSALUFloatInsts() && N->getValueType(0) == MVT::f32 &&
!N->isDivergent()) {
SDValue Src = N->getOperand(0);
if (Src.getValueType() == MVT::f16) {
if (isExtractHiElt(Src, Src)) {
CurDAG->SelectNodeTo(N, AMDGPU::S_CVT_HI_F32_F16, N->getVTList(),
{Src});
return;
}
}
}

SelectCode(N);
}

void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
// The address is assumed to be uniform, so if it ends up in a VGPR, it will
// be copied to an SGPR with readfirstlane.
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool isCBranchSCC(const SDNode *N) const;
void SelectBRCOND(SDNode *N);
void SelectFMAD_FMA(SDNode *N);
void SelectFP_EXTEND(SDNode *N);
void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
void SelectDSBvhStackIntrinsic(SDNode *N);
void SelectDS_GWS(SDNode *N, unsigned IntrID);
Expand Down
166 changes: 139 additions & 27 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1211,36 +1211,104 @@ int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P,
}
}

if (Size != 32)
return -1;
if (Size == 32) {
switch (P) {
case CmpInst::ICMP_NE:
return AMDGPU::S_CMP_LG_U32;
case CmpInst::ICMP_EQ:
return AMDGPU::S_CMP_EQ_U32;
case CmpInst::ICMP_SGT:
return AMDGPU::S_CMP_GT_I32;
case CmpInst::ICMP_SGE:
return AMDGPU::S_CMP_GE_I32;
case CmpInst::ICMP_SLT:
return AMDGPU::S_CMP_LT_I32;
case CmpInst::ICMP_SLE:
return AMDGPU::S_CMP_LE_I32;
case CmpInst::ICMP_UGT:
return AMDGPU::S_CMP_GT_U32;
case CmpInst::ICMP_UGE:
return AMDGPU::S_CMP_GE_U32;
case CmpInst::ICMP_ULT:
return AMDGPU::S_CMP_LT_U32;
case CmpInst::ICMP_ULE:
return AMDGPU::S_CMP_LE_U32;
case CmpInst::FCMP_OEQ:
return AMDGPU::S_CMP_EQ_F32;
case CmpInst::FCMP_OGT:
return AMDGPU::S_CMP_GT_F32;
case CmpInst::FCMP_OGE:
return AMDGPU::S_CMP_GE_F32;
case CmpInst::FCMP_OLT:
return AMDGPU::S_CMP_LT_F32;
case CmpInst::FCMP_OLE:
return AMDGPU::S_CMP_LE_F32;
case CmpInst::FCMP_ONE:
return AMDGPU::S_CMP_LG_F32;
case CmpInst::FCMP_ORD:
return AMDGPU::S_CMP_O_F32;
case CmpInst::FCMP_UNO:
return AMDGPU::S_CMP_U_F32;
case CmpInst::FCMP_UEQ:
return AMDGPU::S_CMP_NLG_F32;
case CmpInst::FCMP_UGT:
return AMDGPU::S_CMP_NLE_F32;
case CmpInst::FCMP_UGE:
return AMDGPU::S_CMP_NLT_F32;
case CmpInst::FCMP_ULT:
return AMDGPU::S_CMP_NGE_F32;
case CmpInst::FCMP_ULE:
return AMDGPU::S_CMP_NGT_F32;
case CmpInst::FCMP_UNE:
return AMDGPU::S_CMP_NEQ_F32;
default:
llvm_unreachable("Unknown condition code!");
}
}

switch (P) {
case CmpInst::ICMP_NE:
return AMDGPU::S_CMP_LG_U32;
case CmpInst::ICMP_EQ:
return AMDGPU::S_CMP_EQ_U32;
case CmpInst::ICMP_SGT:
return AMDGPU::S_CMP_GT_I32;
case CmpInst::ICMP_SGE:
return AMDGPU::S_CMP_GE_I32;
case CmpInst::ICMP_SLT:
return AMDGPU::S_CMP_LT_I32;
case CmpInst::ICMP_SLE:
return AMDGPU::S_CMP_LE_I32;
case CmpInst::ICMP_UGT:
return AMDGPU::S_CMP_GT_U32;
case CmpInst::ICMP_UGE:
return AMDGPU::S_CMP_GE_U32;
case CmpInst::ICMP_ULT:
return AMDGPU::S_CMP_LT_U32;
case CmpInst::ICMP_ULE:
return AMDGPU::S_CMP_LE_U32;
default:
llvm_unreachable("Unknown condition code!");
if (Size == 16) {
if (!STI.hasSALUFloatInsts())
return -1;

switch (P) {
case CmpInst::FCMP_OEQ:
return AMDGPU::S_CMP_EQ_F16;
case CmpInst::FCMP_OGT:
return AMDGPU::S_CMP_GT_F16;
case CmpInst::FCMP_OGE:
return AMDGPU::S_CMP_GE_F16;
case CmpInst::FCMP_OLT:
return AMDGPU::S_CMP_LT_F16;
case CmpInst::FCMP_OLE:
return AMDGPU::S_CMP_LE_F16;
case CmpInst::FCMP_ONE:
return AMDGPU::S_CMP_LG_F16;
case CmpInst::FCMP_ORD:
return AMDGPU::S_CMP_O_F16;
case CmpInst::FCMP_UNO:
return AMDGPU::S_CMP_U_F16;
case CmpInst::FCMP_UEQ:
return AMDGPU::S_CMP_NLG_F16;
case CmpInst::FCMP_UGT:
return AMDGPU::S_CMP_NLE_F16;
case CmpInst::FCMP_UGE:
return AMDGPU::S_CMP_NLT_F16;
case CmpInst::FCMP_ULT:
return AMDGPU::S_CMP_NGE_F16;
case CmpInst::FCMP_ULE:
return AMDGPU::S_CMP_NGT_F16;
case CmpInst::FCMP_UNE:
return AMDGPU::S_CMP_NEQ_F16;
default:
llvm_unreachable("Unknown condition code!");
}
}

return -1;
}

bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(MachineInstr &I) const {

MachineBasicBlock *BB = I.getParent();
const DebugLoc &DL = I.getDebugLoc();

Expand All @@ -1266,6 +1334,9 @@ bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
return Ret;
}

if (I.getOpcode() == AMDGPU::G_FCMP)
return false;

int Opcode = getV_CMPOpcode(Pred, Size, *Subtarget);
if (Opcode == -1)
return false;
Expand Down Expand Up @@ -2439,6 +2510,42 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
return false;
}

static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In,
Register &Out) {
Register LShlSrc;
if (mi_match(In, MRI,
m_GTrunc(m_GLShr(m_Reg(LShlSrc), m_SpecificICst(16))))) {
Out = LShlSrc;
return true;
}
return false;
}

bool AMDGPUInstructionSelector::selectG_FPEXT(MachineInstr &I) const {
if (!Subtarget->hasSALUFloatInsts())
return false;

Register Dst = I.getOperand(0).getReg();
const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
if (DstRB->getID() != AMDGPU::SGPRRegBankID)
return false;

Register Src = I.getOperand(1).getReg();

if (MRI->getType(Dst) == LLT::scalar(32) &&
MRI->getType(Src) == LLT::scalar(16)) {
if (isExtractHiElt(*MRI, Src, Src)) {
MachineBasicBlock *BB = I.getParent();
BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
.addUse(Src);
I.eraseFromParent();
return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
}
}

return false;
}

bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
MachineOperand &ImmOp = I.getOperand(1);
Expand Down Expand Up @@ -3471,7 +3578,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
return selectG_INTRINSIC_W_SIDE_EFFECTS(I);
case TargetOpcode::G_ICMP:
if (selectG_ICMP(I))
case TargetOpcode::G_FCMP:
if (selectG_ICMP_or_FCMP(I))
return true;
return selectImpl(I, *CoverageInfo);
case TargetOpcode::G_LOAD:
Expand Down Expand Up @@ -3508,6 +3616,10 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
selectImpl(I, *CoverageInfo))
return true;
return selectG_SZA_EXT(I);
case TargetOpcode::G_FPEXT:
if (selectG_FPEXT(I))
return true;
return selectImpl(I, *CoverageInfo);
case TargetOpcode::G_BRCOND:
return selectG_BRCOND(I);
case TargetOpcode::G_GLOBAL_VALUE:
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
bool selectPHI(MachineInstr &I) const;
bool selectG_TRUNC(MachineInstr &I) const;
bool selectG_SZA_EXT(MachineInstr &I) const;
bool selectG_FPEXT(MachineInstr &I) const;
bool selectG_CONSTANT(MachineInstr &I) const;
bool selectG_FNEG(MachineInstr &I) const;
bool selectG_FABS(MachineInstr &I) const;
Expand Down Expand Up @@ -129,7 +130,7 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
const AMDGPU::ImageDimIntrinsicInfo *Intr) const;
bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I) const;
int getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) const;
bool selectG_ICMP(MachineInstr &I) const;
bool selectG_ICMP_or_FCMP(MachineInstr &I) const;
bool hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const;
void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI,
SmallVectorImpl<GEPInfo> &AddrInfo) const;
Expand Down
10 changes: 8 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1132,8 +1132,14 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0)
.legalIf(all(typeInSet(0, {S1, S32}), isPointer(1)));

getActionDefinitionsBuilder(G_FCMP)
.legalForCartesianProduct({S1}, ST.has16BitInsts() ? FPTypes16 : FPTypesBase)
auto &FCmpBuilder =
getActionDefinitionsBuilder(G_FCMP).legalForCartesianProduct(
{S1}, ST.has16BitInsts() ? FPTypes16 : FPTypesBase);

if (ST.hasSALUFloatInsts())
FCmpBuilder.legalForCartesianProduct({S32}, {S16, S32});

FCmpBuilder
.widenScalarToNextPow2(1)
.clampScalar(1, S32, S64)
.scalarize(0);
Expand Down
Loading

0 comments on commit 29db22e

Please sign in to comment.