Revert "AMDGPU: Split block for si_end_cf"

This reverts commit 7a6ef30. We discovered some internal test failures, so reverting for now. Differential Revision: https://reviews.llvm.org/D61213 Change-Id: I2bf19b5c7f4b76295d469872be579ca3a2f5850c git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@359363 91177308-0d34-0410-b5e6-96231b3b80d8
ROCm · Apr 27, 2019 · ca7ea08 · ca7ea08
1 parent c811b40
commit ca7ea08
Show file tree

Hide file tree

Showing 7 changed files with 73 additions and 227 deletions.
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1214,12 +1214,6 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
     MI.setDesc(get(AMDGPU::S_XOR_B64));
     break;
 
-  case AMDGPU::S_OR_B64_term:
-    // This is only a terminator to get the correct spill code placement during
-    // register allocation.
-    MI.setDesc(get(AMDGPU::S_OR_B64));
-    break;
-
   case AMDGPU::S_ANDN2_B64_term:
     // This is only a terminator to get the correct spill code placement during
     // register allocation.
@@ -1704,7 +1698,6 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
     case AMDGPU::SI_MASK_BRANCH:
     case AMDGPU::S_MOV_B64_term:
     case AMDGPU::S_XOR_B64_term:
-    case AMDGPU::S_OR_B64_term:
     case AMDGPU::S_ANDN2_B64_term:
       break;
     case AMDGPU::SI_IF:

diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
@@ -189,7 +189,6 @@ class WrapTerminatorInst<SOP_Pseudo base_inst> : SPseudoInstSI<
 }
 
 def S_MOV_B64_term : WrapTerminatorInst<S_MOV_B64>;
-def S_OR_B64_term : WrapTerminatorInst<S_OR_B64>;
 def S_XOR_B64_term : WrapTerminatorInst<S_XOR_B64>;
 def S_ANDN2_B64_term : WrapTerminatorInst<S_ANDN2_B64>;
 

diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -55,7 +55,6 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -80,11 +79,8 @@ class SILowerControlFlow : public MachineFunctionPass {
 private:
   const SIRegisterInfo *TRI = nullptr;
   const SIInstrInfo *TII = nullptr;
-  MachineRegisterInfo *MRI = nullptr;
   LiveIntervals *LIS = nullptr;
-  MachineDominatorTree *DT = nullptr;
-  MachineLoopInfo *MLI = nullptr;
-
+  MachineRegisterInfo *MRI = nullptr;
 
   void emitIf(MachineInstr &MI);
   void emitElse(MachineInstr &MI);
@@ -115,7 +111,7 @@ class SILowerControlFlow : public MachineFunctionPass {
     AU.addPreservedID(LiveVariablesID);
     AU.addPreservedID(MachineLoopInfoID);
     AU.addPreservedID(MachineDominatorsID);
-
+    AU.setPreservesCFG();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 };
@@ -392,99 +388,23 @@ void SILowerControlFlow::emitLoop(MachineInstr &MI) {
   MI.eraseFromParent();
 }
 
-// Insert \p Inst (which modifies exec) at \p InsPt in \p MBB, such that \p MBB
-// is split as necessary to keep the exec modification in its own block.
-static MachineBasicBlock *insertInstWithExecFallthrough(MachineBasicBlock &MBB,
-                                                        MachineInstr &MI,
-                                                        MachineInstr *NewMI,
-                                                        MachineDominatorTree *DT,
-                                                        LiveIntervals *LIS,
-                                                        MachineLoopInfo *MLI) {
-  assert(NewMI->isTerminator());
-
-  MachineBasicBlock::iterator InsPt = MI.getIterator();
-  if (std::next(MI.getIterator()) == MBB.end()) {
-    // Don't bother with a new block.
-    MBB.insert(InsPt, NewMI);
-    if (LIS)
-      LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
-    MI.eraseFromParent();
-    return &MBB;
-  }
-
-  MachineFunction *MF = MBB.getParent();
-  MachineBasicBlock *SplitMBB
-    = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
-
-  MF->insert(++MachineFunction::iterator(MBB), SplitMBB);
-
-  // FIXME: This is working around a MachineDominatorTree API defect.
-  //
-  // If a previous pass split a critical edge, it may not have been applied to
-  // the DomTree yet. applySplitCriticalEdges is lazily applied, and inspects
-  // the CFG of the given block. Make sure to call a dominator tree method that
-  // will flush this cache before touching the successors of the block.
-  MachineDomTreeNode *NodeMBB = nullptr;
-  if (DT)
-    NodeMBB = DT->getNode(&MBB);
-
-  // Move everything to the new block, except the end_cf pseudo.
-  SplitMBB->splice(SplitMBB->begin(), &MBB, MBB.begin(), MBB.end());
-
-  SplitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
-  MBB.addSuccessor(SplitMBB, BranchProbability::getOne());
-
-  MBB.insert(MBB.end(), NewMI);
-
-  if (DT) {
-    std::vector<MachineDomTreeNode *> Children = NodeMBB->getChildren();
-    DT->addNewBlock(SplitMBB, &MBB);
-
-    // Reparent all of the children to the new block body.
-    auto *SplitNode = DT->getNode(SplitMBB);
-    for (auto *Child : Children)
-      DT->changeImmediateDominator(Child, SplitNode);
-  }
-
-  if (MLI) {
-    if (MachineLoop *Loop = MLI->getLoopFor(&MBB))
-      Loop->addBasicBlockToLoop(SplitMBB, MLI->getBase());
-  }
-
-  if (LIS) {
-    LIS->insertMBBInMaps(SplitMBB);
-    LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
-  }
-
-  // All live-ins are forwarded.
-  for (auto &LiveIn : MBB.liveins())
-    SplitMBB->addLiveIn(LiveIn);
-
-  MI.eraseFromParent();
-  return SplitMBB;
-}
-
 void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
   MachineBasicBlock &MBB = *MI.getParent();
   const DebugLoc &DL = MI.getDebugLoc();
 
   MachineBasicBlock::iterator InsPt = MBB.begin();
+  MachineInstr *NewMI =
+      BuildMI(MBB, InsPt, DL, TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC)
+          .addReg(AMDGPU::EXEC)
+          .add(MI.getOperand(0));
 
-  // First, move the instruction. It's unnecessarily difficult to update
-  // LiveIntervals when there's a change in control flow, so move the
-  // instruction before changing the blocks.
-  MBB.splice(InsPt, &MBB, MI.getIterator());
   if (LIS)
-    LIS->handleMove(MI);
+    LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
 
-  MachineFunction *MF = MBB.getParent();
+  MI.eraseFromParent();
 
-  // Create instruction without inserting it yet.
-  MachineInstr *NewMI
-    = BuildMI(*MF, DL, TII->get(AMDGPU::S_OR_B64_term), AMDGPU::EXEC)
-    .addReg(AMDGPU::EXEC)
-    .add(MI.getOperand(0));
-  insertInstWithExecFallthrough(MBB, MI, NewMI, DT, LIS, MLI);
+  if (LIS)
+    LIS->handleMove(*NewMI);
 }
 
 // Returns replace operands for a logical operation, either single result
@@ -550,20 +470,17 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
 
   // This doesn't actually need LiveIntervals, but we can preserve them.
   LIS = getAnalysisIfAvailable<LiveIntervals>();
-  DT = getAnalysisIfAvailable<MachineDominatorTree>();
-  MLI = getAnalysisIfAvailable<MachineLoopInfo>();
-
   MRI = &MF.getRegInfo();
 
   MachineFunction::iterator NextBB;
   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
        BI != BE; BI = NextBB) {
     NextBB = std::next(BI);
-    MachineBasicBlock *MBB = &*BI;
+    MachineBasicBlock &MBB = *BI;
 
     MachineBasicBlock::iterator I, Next, Last;
 
-    for (I = MBB->begin(), Last = MBB->end(); I != MBB->end(); I = Next) {
+    for (I = MBB.begin(), Last = MBB.end(); I != MBB.end(); I = Next) {
       Next = std::next(I);
       MachineInstr &MI = *I;
 
@@ -584,24 +501,10 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
         emitLoop(MI);
         break;
 
-      case AMDGPU::SI_END_CF: {
-        MachineInstr *NextMI = nullptr;
-
-        if (Next != MBB->end())
-          NextMI = &*Next;
-
+      case AMDGPU::SI_END_CF:
         emitEndCf(MI);
-
-        if (NextMI) {
-          MBB = NextMI->getParent();
-          Next = NextMI->getIterator();
-          Last = MBB->end();
-        }
-
-        NextBB = std::next(MBB->getIterator());
-        BE = MF.end();
         break;
-      }
+
       case AMDGPU::S_AND_B64:
       case AMDGPU::S_OR_B64:
         // Cleanup bit manipulations on exec mask
@@ -615,7 +518,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
       }
 
       // Replay newly inserted code to combine masks
-      Next = (Last == MBB->end()) ? MBB->begin() : Last;
+      Next = (Last == MBB.end()) ? MBB.begin() : Last;
     }
   }
 

diff --git a/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
@@ -149,12 +149,6 @@ static bool removeTerminatorBit(const SIInstrInfo &TII, MachineInstr &MI) {
     MI.setDesc(TII.get(AMDGPU::S_XOR_B64));
     return true;
   }
-  case AMDGPU::S_OR_B64_term: {
-    // This is only a terminator to get the correct spill code placement during
-    // register allocation.
-    MI.setDesc(TII.get(AMDGPU::S_OR_B64));
-    return true;
-  }
   case AMDGPU::S_ANDN2_B64_term: {
     // This is only a terminator to get the correct spill code placement during
     // register allocation.

diff --git a/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -83,7 +83,7 @@ FunctionPass *llvm::createSIOptimizeExecMaskingPreRAPass() {
 }
 
 static bool isEndCF(const MachineInstr& MI, const SIRegisterInfo* TRI) {
-  return MI.getOpcode() == AMDGPU::S_OR_B64_term &&
+  return MI.getOpcode() == AMDGPU::S_OR_B64 &&
          MI.modifiesRegister(AMDGPU::EXEC, TRI);
 }
 
@@ -362,7 +362,7 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
 
     // Try to collapse adjacent endifs.
     auto E = MBB.end();
-    auto Lead = MBB.getFirstTerminator();
+    auto Lead = skipDebugInstructionsForward(MBB.begin(), E);
     if (MBB.succ_size() != 1 || Lead == E || !isEndCF(*Lead, TRI))
       continue;