diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index eeb7f64aa5810e..7f6674934aae31 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -2781,10 +2781,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, AFI->setLRIsSpilled(SavedRegs.test(ARM::LR)); } -void ARMFrameLowering::processFunctionBeforeFrameFinalized( - MachineFunction &MF, RegScavenger *RS) const { - TargetFrameLowering::processFunctionBeforeFrameFinalized(MF, RS); - +void ARMFrameLowering::updateLRRestored(MachineFunction &MF) const { MachineFrameInfo &MFI = MF.getFrameInfo(); if (!MFI.isCalleeSavedInfoValid()) return; @@ -2808,6 +2805,12 @@ void ARMFrameLowering::processFunctionBeforeFrameFinalized( } } +void ARMFrameLowering::processFunctionBeforeFrameFinalized( + MachineFunction &MF, RegScavenger *RS) const { + TargetFrameLowering::processFunctionBeforeFrameFinalized(MF, RS); + updateLRRestored(MF); +} + void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const { TargetFrameLowering::getCalleeSaves(MF, SavedRegs); diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.h b/llvm/lib/Target/ARM/ARMFrameLowering.h index 8d2b8beb9a58fb..13c48f5dc61743 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.h +++ b/llvm/lib/Target/ARM/ARMFrameLowering.h @@ -59,6 +59,7 @@ class ARMFrameLowering : public TargetFrameLowering { void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override; + void updateLRRestored(MachineFunction &MF) const; void processFunctionBeforeFrameFinalized( MachineFunction &MF, RegScavenger *RS = nullptr) const override; diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index ed9d30c3c3ab90..5716e022e6711d 100644 --- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -2062,17 +2062,6 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { MO.setReg(ARM::PC); PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI); MBB.erase(MBBI); - // We now restore LR into PC so it is not live-out of the return block - // anymore: Clear the CSI Restored bit. - MachineFrameInfo &MFI = MBB.getParent()->getFrameInfo(); - // CSI should be fixed after PrologEpilog Insertion - assert(MFI.isCalleeSavedInfoValid() && "CSI should be valid"); - for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) { - if (Info.getReg() == ARM::LR) { - Info.setRestored(false); - break; - } - } return true; } } @@ -2120,14 +2109,22 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { isThumb2 = AFI->isThumb2Function(); isThumb1 = AFI->isThumbFunction() && !isThumb2; - bool Modified = false; + bool Modified = false, ModifiedLDMReturn = false; for (MachineBasicBlock &MBB : Fn) { Modified |= LoadStoreMultipleOpti(MBB); if (STI->hasV5TOps() && !AFI->shouldSignReturnAddress()) - Modified |= MergeReturnIntoLDM(MBB); + ModifiedLDMReturn |= MergeReturnIntoLDM(MBB); if (isThumb1) Modified |= CombineMovBx(MBB); } + Modified |= ModifiedLDMReturn; + + // If we merged a BX instruction into an LDM, we need to re-calculate whether + // LR is restored. This check needs to consider the whole function, not just + // the instruction(s) we changed, because there may be other BX returns which + // still need LR to be restored. + if (ModifiedLDMReturn) + Fn.getSubtarget().getFrameLowering()->updateLRRestored(Fn); Allocator.DestroyAll(); return Modified; diff --git a/llvm/test/CodeGen/ARM/ldst-opt-lr-restored.ll b/llvm/test/CodeGen/ARM/ldst-opt-lr-restored.ll index 883a812139ded1..9494880f990258 100644 --- a/llvm/test/CodeGen/ARM/ldst-opt-lr-restored.ll +++ b/llvm/test/CodeGen/ARM/ldst-opt-lr-restored.ll @@ -10,16 +10,17 @@ define i32 @foo(ptr %ctx) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: cbz r0, .LBB0_2 ; CHECK-NEXT: @ %bb.1: @ %if.end +; CHECK-NEXT: movw r12, :lower16:val2 +; CHECK-NEXT: movw r3, :lower16:val1 ; CHECK-NEXT: movw r2, :lower16:val0 ; CHECK-NEXT: mov r1, r0 ; CHECK-NEXT: movs r0, #0 -; CHECK-NEXT: movw r12, :lower16:val2 -; CHECK-NEXT: movw r3, :lower16:val1 -; CHECK-NEXT: movt r2, :upper16:val0 -; CHECK-NEXT: add.w lr, r1, #4 ; CHECK-NEXT: movt r12, :upper16:val2 ; CHECK-NEXT: movt r3, :upper16:val1 -; CHECK-NEXT: stm.w lr, {r2, r3, r12} +; CHECK-NEXT: movt r2, :upper16:val0 +; CHECK-NEXT: str r2, [r1, #4] +; CHECK-NEXT: str r3, [r1, #8] +; CHECK-NEXT: str.w r12, [r1, #12] ; CHECK-NEXT: str r0, [r1, #16] ; CHECK-NEXT: bx lr ; CHECK-NEXT: .LBB0_2: @ %if.then