Skip to content

Commit

Permalink
[AArch64] Optimize add/sub with immediate through MIPeepholeOpt
Browse files Browse the repository at this point in the history
Fixes the build issue with D111034, whose goal was to optimize
add/sub with long immediates.

Optimize ([add|sub] r, imm) -> ([ADD|SUB] ([ADD|SUB] r, #imm0, lsl #12), #imm1),
if imm == (imm0<<12)+imm1. and both imm0 and imm1 are non-zero 12-bit unsigned
integers.

Optimize ([add|sub] r, imm) -> ([SUB|ADD] ([SUB|ADD] r, #imm0, lsl #12), #imm1),
if imm == -(imm0<<12)-imm1, and both imm0 and imm1 are non-zero 12-bit unsigned
integers.

The change which fixed the build issue in D111034 was the use of new virtual
registers so that SSA form is maintained until deleting MI.

Differential Revision: https://reviews.llvm.org/D117429
  • Loading branch information
red1bluelost authored and davemgreen committed Jan 22, 2022
1 parent 4041354 commit 93deac2
Show file tree
Hide file tree
Showing 4 changed files with 302 additions and 66 deletions.
204 changes: 171 additions & 33 deletions llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,19 @@
// 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
// MOVi64imm + ANDXrr ==> ANDXri + ANDXri
//
// 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
// MOVi64imm + ADDXrr ==> ANDXri + ANDXri
//
// 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi
// MOVi64imm + SUBXrr ==> SUBXri + SUBXri
//
// The mov pseudo instruction could be expanded to multiple mov instructions
// later. In this case, we could try to split the constant operand of mov
// instruction into two bitmask immediates. It makes two AND instructions
// intead of multiple `mov` + `and` instructions.
// instruction into two immediates which can be directly encoded into
// *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of
// multiple `mov` + `and/add/sub` instructions.
//
// 2. Remove redundant ORRWrs which is generated by zero-extend.
// 4. Remove redundant ORRWrs which is generated by zero-extend.
//
// %3:gpr32 = ORRWrs $wzr, %2, 0
// %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32
Expand Down Expand Up @@ -51,6 +58,12 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
MachineLoopInfo *MLI;
MachineRegisterInfo *MRI;

bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI,
MachineInstr *&SubregToRegMI);

template <typename T>
bool visitADDSUB(MachineInstr &MI,
SmallSetVector<MachineInstr *, 8> &ToBeRemoved, bool IsAdd);
template <typename T>
bool visitAND(MachineInstr &MI,
SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
Expand Down Expand Up @@ -131,36 +144,9 @@ bool AArch64MIPeepholeOpt::visitAND(
assert((RegSize == 32 || RegSize == 64) &&
"Invalid RegSize for AND bitmask peephole optimization");

// Check whether AND's MBB is in loop and the AND is loop invariant.
MachineBasicBlock *MBB = MI.getParent();
MachineLoop *L = MLI->getLoopFor(MBB);
if (L && !L->isLoopInvariant(MI))
return false;

// Check whether AND's operand is MOV with immediate.
MachineInstr *MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
if (!MovMI)
return false;

MachineInstr *SubregToRegMI = nullptr;
// If it is SUBREG_TO_REG, check its operand.
if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
SubregToRegMI = MovMI;
MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());
if (!MovMI)
return false;
}

if (MovMI->getOpcode() != AArch64::MOVi32imm &&
MovMI->getOpcode() != AArch64::MOVi64imm)
return false;

// If the MOV has multiple uses, do not split the immediate because it causes
// more instructions.
if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
return false;

if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
// Perform several essential checks against current MI.
MachineInstr *MovMI = nullptr, *SubregToRegMI = nullptr;
if (!checkMovImmInstr(MI, MovMI, SubregToRegMI))
return false;

// Split the bitmask immediate into two.
Expand All @@ -177,6 +163,7 @@ bool AArch64MIPeepholeOpt::visitAND(

// Create new AND MIs.
DebugLoc DL = MI.getDebugLoc();
MachineBasicBlock *MBB = MI.getParent();
const TargetRegisterClass *ANDImmRC =
(RegSize == 32) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass;
Register DstReg = MI.getOperand(0).getReg();
Expand Down Expand Up @@ -251,6 +238,145 @@ bool AArch64MIPeepholeOpt::visitORR(
return true;
}

template <typename T>
static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
// The immediate must be in the form of ((imm0 << 12) + imm1), in which both
// imm0 and imm1 are non-zero 12-bit unsigned int.
if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 ||
(Imm & ~static_cast<T>(0xffffff)) != 0)
return false;

// The immediate can not be composed via a single instruction.
SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
AArch64_IMM::expandMOVImm(Imm, RegSize, Insn);
if (Insn.size() == 1)
return false;

// Split Imm into (Imm0 << 12) + Imm1;
Imm0 = (Imm >> 12) & 0xfff;
Imm1 = Imm & 0xfff;
return true;
}

template <typename T>
bool AArch64MIPeepholeOpt::visitADDSUB(
MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved,
bool IsAdd) {
// Try below transformation.
//
// MOVi32imm + ADDWrr ==> ADDWri + ADDWri
// MOVi64imm + ADDXrr ==> ADDXri + ADDXri
//
// MOVi32imm + SUBWrr ==> SUBWri + SUBWri
// MOVi64imm + SUBXrr ==> SUBXri + SUBXri
//
// The mov pseudo instruction could be expanded to multiple mov instructions
// later. Let's try to split the constant operand of mov instruction into two
// legal add/sub immediates. It makes only two ADD/SUB instructions intead of
// multiple `mov` + `and/sub` instructions.

unsigned RegSize = sizeof(T) * 8;
assert((RegSize == 32 || RegSize == 64) &&
"Invalid RegSize for legal add/sub immediate peephole optimization");

// Perform several essential checks against current MI.
MachineInstr *MovMI, *SubregToRegMI;
if (!checkMovImmInstr(MI, MovMI, SubregToRegMI))
return false;

// Split the immediate to Imm0 and Imm1, and calculate the Opcode.
T Imm = static_cast<T>(MovMI->getOperand(1).getImm()), Imm0, Imm1;
unsigned Opcode;
if (splitAddSubImm(Imm, RegSize, Imm0, Imm1)) {
if (IsAdd)
Opcode = RegSize == 32 ? AArch64::ADDWri : AArch64::ADDXri;
else
Opcode = RegSize == 32 ? AArch64::SUBWri : AArch64::SUBXri;
} else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1)) {
if (IsAdd)
Opcode = RegSize == 32 ? AArch64::SUBWri : AArch64::SUBXri;
else
Opcode = RegSize == 32 ? AArch64::ADDWri : AArch64::ADDXri;
} else {
return false;
}

// Create new ADD/SUB MIs.
DebugLoc DL = MI.getDebugLoc();
MachineBasicBlock *MBB = MI.getParent();
const TargetRegisterClass *RC =
(RegSize == 32) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass;
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
Register NewTmpReg = MRI->createVirtualRegister(RC);
Register NewDstReg = MRI->createVirtualRegister(RC);

MRI->constrainRegClass(SrcReg, RC);
BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg)
.addReg(SrcReg)
.addImm(Imm0)
.addImm(12);

MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg)
.addReg(NewTmpReg)
.addImm(Imm1)
.addImm(0);

MRI->replaceRegWith(DstReg, NewDstReg);
// replaceRegWith changes MI's definition register. Keep it for SSA form until
// deleting MI.
MI.getOperand(0).setReg(DstReg);

// Record the MIs need to be removed.
ToBeRemoved.insert(&MI);
if (SubregToRegMI)
ToBeRemoved.insert(SubregToRegMI);
ToBeRemoved.insert(MovMI);

return true;
}

// Checks if the corresponding MOV immediate instruction is applicable for
// this peephole optimization.
bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI,
MachineInstr *&MovMI,
MachineInstr *&SubregToRegMI) {
// Check whether current MBB is in loop and the AND is loop invariant.
MachineBasicBlock *MBB = MI.getParent();
MachineLoop *L = MLI->getLoopFor(MBB);
if (L && !L->isLoopInvariant(MI))
return false;

// Check whether current MI's operand is MOV with immediate.
MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
if (!MovMI)
return false;

// If it is SUBREG_TO_REG, check its operand.
SubregToRegMI = nullptr;
if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
SubregToRegMI = MovMI;
MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());
if (!MovMI)
return false;
}

if (MovMI->getOpcode() != AArch64::MOVi32imm &&
MovMI->getOpcode() != AArch64::MOVi64imm)
return false;

// If the MOV has multiple uses, do not split the immediate because it causes
// more instructions.
if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
return false;
if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
return false;

// It is OK to perform this peephole optimization.
return true;
}

bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
Expand Down Expand Up @@ -278,6 +404,18 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
case AArch64::ORRWrs:
Changed = visitORR(MI, ToBeRemoved);
break;
case AArch64::ADDWrr:
Changed = visitADDSUB<uint32_t>(MI, ToBeRemoved, true);
break;
case AArch64::SUBWrr:
Changed = visitADDSUB<uint32_t>(MI, ToBeRemoved, false);
break;
case AArch64::ADDXrr:
Changed = visitADDSUB<uint64_t>(MI, ToBeRemoved, true);
break;
case AArch64::SUBXrr:
Changed = visitADDSUB<uint64_t>(MI, ToBeRemoved, false);
break;
}
}
}
Expand Down
63 changes: 63 additions & 0 deletions llvm/test/CodeGen/AArch64/addsub-24bit-imm.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -run-pass=aarch64-mi-peephole-opt -o - -mtriple=aarch64-unknown-linux -verify-machineinstrs %s | FileCheck %s

# Main intention is to verify machine instructions have valid register classes.
# Use of UBFM[W|X]ri is used as an arbitrary instruction that requires GPR[32|64]RegClass.
# If the ADD/SUB optimization generates invalid register classes, this test will fail.
---
name: addi
body: |
bb.0.entry:
liveins: $w0
; CHECK-LABEL: name: addi
; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0
; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[COPY]], 273, 12
; CHECK-NEXT: [[ADDWri1:%[0-9]+]]:gpr32common = ADDWri [[ADDWri]], 3549, 0
; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[ADDWri1]], 28, 31
; CHECK-NEXT: $w0 = COPY [[UBFMWri]]
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:gpr32 = COPY $w0
%1:gpr32 = MOVi32imm 1121757
%2:gpr32 = ADDWrr %0, %1
%3:gpr32 = UBFMWri %2, 28, 31
$w0 = COPY %3
RET_ReallyLR implicit $w0
...
---
name: addl
body: |
bb.0.entry:
liveins: $x0
; CHECK-LABEL: name: addl
; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri [[COPY]], 273, 12
; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64common = ADDXri [[ADDXri]], 3549, 0
; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[ADDXri1]], 28, 31
; CHECK-NEXT: $x0 = COPY [[UBFMXri]]
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:gpr64 = COPY $x0
%1:gpr32 = MOVi32imm 1121757
%2:gpr64 = SUBREG_TO_REG 0, %1, %subreg.sub_32
%3:gpr64 = ADDXrr %0, killed %2
%4:gpr64 = UBFMXri %3, 28, 31
$x0 = COPY %4
RET_ReallyLR implicit $x0
...
---
name: addl_negate
body: |
bb.0.entry:
liveins: $x0
; CHECK-LABEL: name: addl_negate
; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
; CHECK-NEXT: [[SUBXri:%[0-9]+]]:gpr64sp = SUBXri [[COPY]], 273, 12
; CHECK-NEXT: [[SUBXri1:%[0-9]+]]:gpr64common = SUBXri [[SUBXri]], 3549, 0
; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[SUBXri1]], 28, 31
; CHECK-NEXT: $x0 = COPY [[UBFMXri]]
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:gpr64 = COPY $x0
%1:gpr64 = MOVi64imm -1121757
%2:gpr64 = ADDXrr %0, killed %1
%3:gpr64 = UBFMXri %2, 28, 31
$x0 = COPY %3
RET_ReallyLR implicit $x0
Loading

0 comments on commit 93deac2

Please sign in to comment.