From 93c1ed53fcbdb1ed4d8695f3a3e184005dd3126f Mon Sep 17 00:00:00 2001 From: Noah Huetter Date: Mon, 21 Jun 2021 12:01:55 +0200 Subject: [PATCH] ssr: Add barrier instructions that polls for done bit (#11) --- README.md | 8 ++++ clang/include/clang/Basic/BuiltinsRISCV.def | 1 + llvm/include/llvm/IR/IntrinsicsRISCV.td | 3 ++ llvm/lib/Target/RISCV/RISCVExpandSSRInsts.cpp | 48 +++++++++++++++++++ llvm/lib/Target/RISCV/RISCVInstrInfoXssr.td | 10 ++++ .../CodeGen/RISCV/ssr-pseudo-instructions.mir | 24 ++++++++++ 6 files changed, 94 insertions(+) diff --git a/README.md b/README.md index d4273cbf602cfeb..ec422d6fe69b29b 100644 --- a/README.md +++ b/README.md @@ -149,6 +149,14 @@ void __builtin_ssr_setup_bound_stride_3d(uint32_t DM, uint32_t b, uint32_t s); * @param s relative stride */ void __builtin_ssr_setup_bound_stride_4d(uint32_t DM, uint32_t b, uint32_t s); + +/** + * @brief Wait for the done bit to be set on data mover `DM` + * @details Creates a polling loop and might not exit if SSR not configured correctly + * + * @param DM data mover ID + */ +void __builtin_ssr_barrier(uint32_t DM); ``` ### SDMA diff --git a/clang/include/clang/Basic/BuiltinsRISCV.def b/clang/include/clang/Basic/BuiltinsRISCV.def index 510e6f14bd50d65..a75cbacf567e211 100644 --- a/clang/include/clang/Basic/BuiltinsRISCV.def +++ b/clang/include/clang/Basic/BuiltinsRISCV.def @@ -39,6 +39,7 @@ SSR_BUILTIN(push, "vUid", "n", "xssr") SSR_BUILTIN(pop, "dUi", "n", "xssr") SSR_BUILTIN(enable, "v", "n", "xssr") SSR_BUILTIN(disable, "v", "n", "xssr") +SSR_BUILTIN(barrier, "vUi", "n", "xssr") // SDMA builtins diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index ae9f366507165ff..4068715dde97734 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -1167,6 +1167,9 @@ let TargetPrefix = "riscv" in { [llvm_i32_ty, llvm_i32_ty], [IntrHasSideEffects, ImmArg>]>, RISCVSSRIntrinsic; + def int_riscv_ssr_barrier + : GCCBuiltin<"__builtin_ssr_barrier">, + Intrinsic<[], [llvm_i32_ty], [IntrHasSideEffects, ImmArg>]>, RISCVSSRIntrinsic; } // TargetPrefix = "riscv" diff --git a/llvm/lib/Target/RISCV/RISCVExpandSSRInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandSSRInsts.cpp index 91a23ca4bb70984..c5115c699cfdaaa 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandSSRInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandSSRInsts.cpp @@ -80,6 +80,9 @@ class RISCVExpandSSR : public MachineFunctionPass { MachineBasicBlock::iterator MBBI); bool expandSSR_SetupRep(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); + bool expandSSR_Barrier(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); RISCVExpandSSR::RegisterMergingPreferences gatherRegisterMergingPreferences(); }; @@ -165,6 +168,8 @@ bool RISCVExpandSSR::expandMI(MachineBasicBlock &MBB, return expandSSR_EnDis(MBB, MBBI); case RISCV::PseudoSSRSetupRepetition: return expandSSR_SetupRep(MBB, MBBI); + case RISCV::PseudoSSRBarrier: + return expandSSR_Barrier(MBB, MBBI, NextMBBI); } // Prevent excessive live-ins, they pose a problem with multiple SSR regions @@ -347,6 +352,49 @@ bool RISCVExpandSSR::expandSSR_EnDis(MachineBasicBlock &MBB, return true; } +bool RISCVExpandSSR::expandSSR_Barrier(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + DebugLoc DL = MBBI->getDebugLoc(); + MachineInstr &MI = *MBBI; + MachineFunction *MF = MBB.getParent(); + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + + unsigned streamer = (unsigned)MBBI->getOperand(0).getImm(); + + LLVM_DEBUG(dbgs() << "-- Expanding SSR barrier on DM" << streamer << "\n"); + + auto LoopMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + + // Insert new MBBs. + MF->insert(++MBB.getIterator(), LoopMBB); + MF->insert(++LoopMBB->getIterator(), DoneMBB); + + // Set up successors and transfer remaining instructions to DoneMBB. + LoopMBB->addSuccessor(LoopMBB); + LoopMBB->addSuccessor(DoneMBB); + DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end()); + DoneMBB->transferSuccessorsAndUpdatePHIs(&MBB); + MBB.addSuccessor(LoopMBB); + + // build loop: %0 = scfgri 0 | DM; srli %0, %0, 31; beq %0, zero, loop + Register R = MRI.createVirtualRegister(&RISCV::GPRRegClass); + BuildMI(LoopMBB, DL, TII->get(RISCV::SCFGRI), R).addImm(streamer); + Register Rs = MRI.createVirtualRegister(&RISCV::GPRRegClass); + BuildMI(LoopMBB, DL, TII->get(RISCV::SRLI), Rs).addReg(R, RegState::Kill).addImm(31); + BuildMI(LoopMBB, DL, TII->get(RISCV::BEQ)).addReg(Rs, RegState::Kill).addReg(RISCV::X0).addMBB(LoopMBB); + + NextMBBI = MBB.end(); + MI.eraseFromParent(); + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *LoopMBB); + computeAndAddLiveIns(LiveRegs, *DoneMBB); + + return true; +} + void RISCVExpandSSR::mergePushPop(MachineBasicBlock &MBB) { SmallSet virtRegs[NUM_SSR]; const TargetRegisterInfo *TRI = MBB.getParent()->getRegInfo().getTargetRegisterInfo(); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXssr.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXssr.td index a8e2a74f6523e8e..1effed3387d1e18 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXssr.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXssr.td @@ -129,6 +129,12 @@ class SPseudoEnDis: let usesCustomInserter = 0; } +class SPseudoBarrier: + Pseudo<(outs), (ins uimm5:$ssr),[]> { + let hasSideEffects = 1; + let usesCustomInserter = 0; +} + let Predicates = [HasExtXssr] in { def PseudoSSRSetup_1D_R : SPseudoSetup1D; def PseudoSSRSetup_1D_W : SPseudoSetup1D; @@ -144,6 +150,7 @@ let Predicates = [HasExtXssr] in { def PseudoSSRRead : SPseudoRW; def PseudoSSRWrite : SPseudoRW; def PseudoSSRSetupRepetition : SPseudoSetupRepetition; + def PseudoSSRBarrier : SPseudoBarrier; // pattern matching on intrinsic and resulting in pseudo instruction def : Pat<(int_riscv_ssr_setup_1d_r timm:$ssr, GPR:$rep, GPR:$bound, GPR:$stride, GPR:$ptr), @@ -176,4 +183,7 @@ let Predicates = [HasExtXssr] in { def : Pat<(int_riscv_ssr_enable), (PseudoSSREnable)>; def : Pat<(int_riscv_ssr_disable), (PseudoSSRDisable)>; + def : Pat<(int_riscv_ssr_barrier timm:$ssr), + (PseudoSSRBarrier timm:$ssr)>; + } // Predicates = [HasExtXssr] diff --git a/llvm/test/CodeGen/RISCV/ssr-pseudo-instructions.mir b/llvm/test/CodeGen/RISCV/ssr-pseudo-instructions.mir index f2bfb3830cebd6c..978c55833cb7a3d 100644 --- a/llvm/test/CodeGen/RISCV/ssr-pseudo-instructions.mir +++ b/llvm/test/CodeGen/RISCV/ssr-pseudo-instructions.mir @@ -8,6 +8,7 @@ define i32 @outline_3(i32 %a, i32 %b) { ret i32 0 } define i32 @outline_4(i32 %a, i32 %b) { ret i32 0 } define i32 @outline_5(i32 %a, i32 %b) { ret i32 0 } + define i32 @outline_6(i32 %a, i32 %b) { ret i32 0 } ... --- name: outline_0 @@ -96,3 +97,26 @@ body: | PseudoSSRDisable PseudoRET ... +--- +name: outline_6 +tracksRegLiveness: true +body: | + bb.0: + liveins: + + ; RV32-SSR: bb.1 + ; RV32-SSR: %0:gpr = SCFGRI 0 + ; RV32-SSR-NEXT: %1:gpr = SRLI killed %0, 31 + ; RV32-SSR-NEXT: BEQ killed %1, $x0, %bb.1 + + PseudoSSRBarrier 0 + + ; RV32-SSR: bb.3 + ; RV32-SSR: %2:gpr = SCFGRI 1 + ; RV32-SSR-NEXT: %3:gpr = SRLI killed %2, 31 + ; RV32-SSR-NEXT: BEQ killed %3, $x0, %bb.3 + + PseudoSSRBarrier 1 + + PseudoRET +...