Skip to content

Commit

Permalink
[AMDGPU][SILowerSGPRSpills] Insert individual kill instructions
Browse files Browse the repository at this point in the history
Liveness extention of virtual VGPRs used for SGPR spilling
currently inserts a single KILL instruction in the epilogue
blocks marking all of them collectively. This prevents their
liveranges from being splittable during allocation and leads
to unsuccessful allocation when ran out of allocatable registers.

This patch introduces separate KILL instruction for each VGPR
involved so that their liveness can be successfully splittable.

Change-Id: I0e413177c0039d9606e6f88e7c83c7a283a3b3fb
  • Loading branch information
cdevadas committed Apr 6, 2023
1 parent 249046b commit 5fe166b
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 25 deletions.
16 changes: 8 additions & 8 deletions llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,18 +285,18 @@ void SILowerSGPRSpills::extendWWMVirtRegLiveness(MachineFunction &MF,
}

// Insert the KILL in the return blocks to extend their liveness untill the
// end of function.
// end of function. Insert a separate KILL for each VGPR.
for (MachineBasicBlock *RestoreBlock : RestoreBlocks) {
MachineBasicBlock::iterator InsertBefore =
RestoreBlock->getFirstTerminator();
auto MIB =
BuildMI(*RestoreBlock, *InsertBefore, InsertBefore->getDebugLoc(),
TII->get(TargetOpcode::KILL));
for (auto Reg : MFI->getSGPRSpillVGPRs())
for (auto Reg : MFI->getSGPRSpillVGPRs()) {
auto MIB =
BuildMI(*RestoreBlock, *InsertBefore, InsertBefore->getDebugLoc(),
TII->get(TargetOpcode::KILL));
MIB.addReg(Reg);

if (LIS)
LIS->InsertMachineInstrInMaps(*MIB);
if (LIS)
LIS->InsertMachineInstrInMaps(*MIB);
}
}
}

Expand Down
29 changes: 17 additions & 12 deletions llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
Original file line number Diff line number Diff line change
Expand Up @@ -421,15 +421,17 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(i32 addrspace(1)* %out,
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: .LBB0_2: ; %ret
; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
; GCN-NEXT: buffer_load_dword v0, off, s[92:95], 0 offset:12 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v0, off, s[92:95], 0 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[34:35]
; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
; GCN-NEXT: buffer_load_dword v1, off, s[92:95], 0 offset:8 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[34:35]
; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
; GCN-NEXT: buffer_load_dword v2, off, s[92:95], 0 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v2, off, s[92:95], 0 offset:12 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[34:35]
; GCN-NEXT: ; kill: killed $vgpr0 killed $vgpr1 killed $vgpr2
; GCN-NEXT: ; kill: killed $vgpr2
; GCN-NEXT: ; kill: killed $vgpr1
; GCN-NEXT: ; kill: killed $vgpr0
; GCN-NEXT: s_endpgm
%wide.sgpr0 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
%wide.sgpr1 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
Expand Down Expand Up @@ -693,12 +695,13 @@ define amdgpu_kernel void @split_sgpr_spill_2_vgprs(i32 addrspace(1)* %out, i32
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: .LBB1_2: ; %ret
; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1
; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[28:29]
; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1
; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[28:29]
; GCN-NEXT: ; kill: killed $vgpr0 killed $vgpr1
; GCN-NEXT: ; kill: killed $vgpr1
; GCN-NEXT: ; kill: killed $vgpr0
; GCN-NEXT: s_endpgm
%wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
%wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
Expand Down Expand Up @@ -940,12 +943,13 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: .LBB2_2: ; %ret
; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[34:35]
; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[34:35]
; GCN-NEXT: ; kill: killed $vgpr0 killed $vgpr1
; GCN-NEXT: ; kill: killed $vgpr1
; GCN-NEXT: ; kill: killed $vgpr0
; GCN-NEXT: s_endpgm
call void asm sideeffect "", "~{v[0:7]}" () #0
call void asm sideeffect "", "~{v[8:15]}" () #0
Expand Down Expand Up @@ -1196,12 +1200,13 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 {
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: .LBB3_2: ; %ret
; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[34:35]
; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[34:35]
; GCN-NEXT: ; kill: killed $vgpr0 killed $vgpr1
; GCN-NEXT: ; kill: killed $vgpr1
; GCN-NEXT: ; kill: killed $vgpr0
; GCN-NEXT: s_endpgm
call void asm sideeffect "", "~{v[0:7]}" () #0
call void asm sideeffect "", "~{v[8:15]}" () #0
Expand Down
6 changes: 4 additions & 2 deletions llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,8 @@ define amdgpu_kernel void @kernel0(i32 addrspace(1)* %out, i32 %in) #1 {
; CHECK-NEXT: v_writelane_b32 v0, s15, 47
; CHECK-NEXT: s_cbranch_scc0 .LBB0_2
; CHECK-NEXT: ; %bb.1: ; %ret
; CHECK-NEXT: ; kill: killed $vgpr23 killed $vgpr0
; CHECK-NEXT: ; kill: killed $vgpr23
; CHECK-NEXT: ; kill: killed $vgpr0
; CHECK-NEXT: s_endpgm
; CHECK-NEXT: .LBB0_2: ; %bb0
; CHECK-NEXT: v_readlane_b32 s0, v23, 0
Expand Down Expand Up @@ -372,7 +373,8 @@ define amdgpu_kernel void @kernel0(i32 addrspace(1)* %out, i32 %in) #1 {
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use s[0:15]
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ; kill: killed $vgpr23 killed $vgpr0
; CHECK-NEXT: ; kill: killed $vgpr23
; CHECK-NEXT: ; kill: killed $vgpr0
; CHECK-NEXT: s_endpgm
call void asm sideeffect "", "~{v[0:7]}" () #0
call void asm sideeffect "", "~{v[8:15]}" () #0
Expand Down
7 changes: 4 additions & 3 deletions llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -214,12 +214,13 @@ define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(i32 addrspace(1)* %o
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: .LBB0_2: ; %ret
; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1
; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[24:25]
; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1
; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:8 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[24:25]
; GCN-NEXT: ; kill: killed $vgpr0 killed $vgpr1
; GCN-NEXT: ; kill: killed $vgpr1
; GCN-NEXT: ; kill: killed $vgpr0
; GCN-NEXT: s_endpgm
call void asm sideeffect "", "~{v[0:7]}" () #0
call void asm sideeffect "", "~{v[8:15]}" () #0
Expand Down

0 comments on commit 5fe166b

Please sign in to comment.