From 7678e6e562811688b472ad19900fa64cd00b7c06 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 21 Mar 2024 08:14:48 -0700 Subject: [PATCH] [RISCV] Lower the alignment requirement for a GPR pair spill for Zdinx on RV32. (#85871) I believe we can use XLen alignment as long as eliminateFrameIndex limits the maximum folded offset to 2043. This way when we split the load/store into two 2 instructions we'll be able to add 4 without overflowing simm12. --- llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 7 ++ llvm/lib/Target/RISCV/RISCVRegisterInfo.td | 2 +- llvm/test/CodeGen/RISCV/zdinx-large-spill.mir | 74 +++++++++++++++++++ 3 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/RISCV/zdinx-large-spill.mir diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 952d17468da59..74d65324b95d8 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -446,6 +446,13 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, (Lo12 & 0b11111) != 0) { // Prefetch instructions require the offset to be 32 byte aligned. MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0); + } else if ((Opc == RISCV::PseudoRV32ZdinxLD || + Opc == RISCV::PseudoRV32ZdinxSD) && + Lo12 >= 2044) { + // This instruction will be split into 2 instructions. The second + // instruction will add 4 to the immediate. If that would overflow 12 + // bits, we can't fold the offset. + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0); } else { // We can encode an add with 12 bit signed immediate in the immediate // operand of our user instruction. As a result, the remaining diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index 225b57554c1dc..9da1f73681c68 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -573,7 +573,7 @@ let RegAltNameIndices = [ABIRegAltName] in { } let RegInfos = RegInfoByHwMode<[RV32, RV64], - [RegInfo<64, 64, 64>, RegInfo<128, 128, 128>]>, + [RegInfo<64, 64, 32>, RegInfo<128, 128, 64>]>, DecoderMethod = "DecodeGPRPairRegisterClass" in def GPRPair : RegisterClass<"RISCV", [XLenPairFVT], 64, (add X10_X11, X12_X13, X14_X15, X16_X17, diff --git a/llvm/test/CodeGen/RISCV/zdinx-large-spill.mir b/llvm/test/CodeGen/RISCV/zdinx-large-spill.mir new file mode 100644 index 0000000000000..a2a722a7f7282 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/zdinx-large-spill.mir @@ -0,0 +1,74 @@ +# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +# RUN: llc %s -mtriple=riscv32 -mattr=+zdinx -start-before=prologepilog -o - | FileCheck %s + +# We want to make sure eliminateFrameIndex doesn't fold sp+2044 as an offset in +# a GPR pair spill/reload instruction. When we split the pair spill, we would be +# unable to add 4 to the immediate without overflowing simm12. + +--- | + define void @foo() { + ; CHECK-LABEL: foo: + ; CHECK: # %bb.0: + ; CHECK-NEXT: addi sp, sp, -2048 + ; CHECK-NEXT: addi sp, sp, -16 + ; CHECK-NEXT: .cfi_def_cfa_offset 2064 + ; CHECK-NEXT: lui t0, 1 + ; CHECK-NEXT: add t0, sp, t0 + ; CHECK-NEXT: sw a0, -2040(t0) + ; CHECK-NEXT: sw a1, -2036(t0) + ; CHECK-NEXT: lui a0, 1 + ; CHECK-NEXT: add a0, sp, a0 + ; CHECK-NEXT: sw a2, -2048(a0) + ; CHECK-NEXT: sw a3, -2044(a0) + ; CHECK-NEXT: sw a4, 2040(sp) + ; CHECK-NEXT: sw a5, 2044(sp) + ; CHECK-NEXT: sw a6, 2032(sp) + ; CHECK-NEXT: sw a7, 2036(sp) + ; CHECK-NEXT: lui a0, 1 + ; CHECK-NEXT: add a0, sp, a0 + ; CHECK-NEXT: lw a1, -2036(a0) + ; CHECK-NEXT: lw a0, -2040(a0) + ; CHECK-NEXT: lui a0, 1 + ; CHECK-NEXT: add a0, sp, a0 + ; CHECK-NEXT: lw a2, -2048(a0) + ; CHECK-NEXT: lw a3, -2044(a0) + ; CHECK-NEXT: lw a4, 2040(sp) + ; CHECK-NEXT: lw a5, 2044(sp) + ; CHECK-NEXT: lw a6, 2032(sp) + ; CHECK-NEXT: lw a7, 2036(sp) + ; CHECK-NEXT: addi sp, sp, 2032 + ; CHECK-NEXT: addi sp, sp, 32 + ; CHECK-NEXT: ret + ret void + } +... +--- +name: foo +tracksRegLiveness: true +tracksDebugUserValues: true +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, type: spill-slot, size: 8, alignment: 4 } + - { id: 1, type: spill-slot, size: 8, alignment: 4 } + - { id: 2, type: spill-slot, size: 8, alignment: 4 } + - { id: 3, type: spill-slot, size: 8, alignment: 4 } + - { id: 4, type: spill-slot, size: 2024, alignment: 4 } +machineFunctionInfo: + varArgsFrameIndex: 0 + varArgsSaveSize: 0 +body: | + bb.0: + liveins: $x10_x11, $x12_x13, $x14_x15, $x16_x17 + + PseudoRV32ZdinxSD killed renamable $x10_x11, %stack.0, 0 :: (store (s64) into %stack.0, align 4) + PseudoRV32ZdinxSD killed renamable $x12_x13, %stack.1, 0 :: (store (s64) into %stack.1, align 4) + PseudoRV32ZdinxSD killed renamable $x14_x15, %stack.2, 0 :: (store (s64) into %stack.2, align 4) + PseudoRV32ZdinxSD killed renamable $x16_x17, %stack.3, 0 :: (store (s64) into %stack.3, align 4) + renamable $x10_x11 = PseudoRV32ZdinxLD %stack.0, 0 :: (load (s64) from %stack.0, align 4) + renamable $x12_x13 = PseudoRV32ZdinxLD %stack.1, 0 :: (load (s64) from %stack.1, align 4) + renamable $x14_x15 = PseudoRV32ZdinxLD %stack.2, 0 :: (load (s64) from %stack.2, align 4) + renamable $x16_x17 = PseudoRV32ZdinxLD %stack.3, 0 :: (load (s64) from %stack.3, align 4) + PseudoRET + +...