-
Notifications
You must be signed in to change notification settings - Fork 12.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Fix setting nontemporal in memory legalizer #83815
[AMDGPU] Fix setting nontemporal in memory legalizer #83815
Conversation
|
@llvm/pr-subscribers-backend-amdgpu Author: Mirko Brkušanin (mbrkusanin) ChangesIterator MI can advance in insertWait() but we need original instruction to Patch is 1.84 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/83815.diff 24 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 4069a368f68719..73c23f0f987c3d 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -2392,6 +2392,11 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
bool Changed = false;
+ if (IsNonTemporal) {
+ // Set non-temporal hint for all cache levels.
+ Changed |= setTH(MI, AMDGPU::CPol::TH_NT);
+ }
+
if (IsVolatile) {
Changed |= setScope(MI, AMDGPU::CPol::SCOPE_SYS);
@@ -2407,11 +2412,6 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
Position::AFTER);
}
- if (IsNonTemporal) {
- // Set non-temporal hint for all cache levels.
- Changed |= setTH(MI, AMDGPU::CPol::TH_NT);
- }
-
return Changed;
}
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
index 77962fadcacfc6..e13542f61474e2 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
@@ -10,6 +10,8 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
define amdgpu_kernel void @singlethread_acquire_fence() {
; GFX6-LABEL: singlethread_acquire_fence:
@@ -55,6 +57,14 @@ define amdgpu_kernel void @singlethread_acquire_fence() {
; GFX11-CU-LABEL: singlethread_acquire_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: singlethread_acquire_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: singlethread_acquire_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread") acquire
ret void
@@ -104,6 +114,14 @@ define amdgpu_kernel void @singlethread_release_fence() {
; GFX11-CU-LABEL: singlethread_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: singlethread_release_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: singlethread_release_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread") release
ret void
@@ -153,6 +171,14 @@ define amdgpu_kernel void @singlethread_acq_rel_fence() {
; GFX11-CU-LABEL: singlethread_acq_rel_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: singlethread_acq_rel_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: singlethread_acq_rel_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread") acq_rel
ret void
@@ -202,6 +228,14 @@ define amdgpu_kernel void @singlethread_seq_cst_fence() {
; GFX11-CU-LABEL: singlethread_seq_cst_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: singlethread_seq_cst_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: singlethread_seq_cst_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread") seq_cst
ret void
@@ -251,6 +285,14 @@ define amdgpu_kernel void @singlethread_one_as_acquire_fence() {
; GFX11-CU-LABEL: singlethread_one_as_acquire_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: singlethread_one_as_acquire_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: singlethread_one_as_acquire_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread-one-as") acquire
ret void
@@ -300,6 +342,14 @@ define amdgpu_kernel void @singlethread_one_as_release_fence() {
; GFX11-CU-LABEL: singlethread_one_as_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: singlethread_one_as_release_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: singlethread_one_as_release_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread-one-as") release
ret void
@@ -349,6 +399,14 @@ define amdgpu_kernel void @singlethread_one_as_acq_rel_fence() {
; GFX11-CU-LABEL: singlethread_one_as_acq_rel_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: singlethread_one_as_acq_rel_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: singlethread_one_as_acq_rel_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread-one-as") acq_rel
ret void
@@ -398,6 +456,14 @@ define amdgpu_kernel void @singlethread_one_as_seq_cst_fence() {
; GFX11-CU-LABEL: singlethread_one_as_seq_cst_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: singlethread_one_as_seq_cst_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: singlethread_one_as_seq_cst_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread-one-as") seq_cst
ret void
@@ -447,6 +513,14 @@ define amdgpu_kernel void @wavefront_acquire_fence() {
; GFX11-CU-LABEL: wavefront_acquire_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: wavefront_acquire_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: wavefront_acquire_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront") acquire
ret void
@@ -496,6 +570,14 @@ define amdgpu_kernel void @wavefront_release_fence() {
; GFX11-CU-LABEL: wavefront_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: wavefront_release_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: wavefront_release_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront") release
ret void
@@ -545,6 +627,14 @@ define amdgpu_kernel void @wavefront_acq_rel_fence() {
; GFX11-CU-LABEL: wavefront_acq_rel_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: wavefront_acq_rel_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: wavefront_acq_rel_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront") acq_rel
ret void
@@ -594,6 +684,14 @@ define amdgpu_kernel void @wavefront_seq_cst_fence() {
; GFX11-CU-LABEL: wavefront_seq_cst_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: wavefront_seq_cst_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: wavefront_seq_cst_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront") seq_cst
ret void
@@ -643,6 +741,14 @@ define amdgpu_kernel void @wavefront_one_as_acquire_fence() {
; GFX11-CU-LABEL: wavefront_one_as_acquire_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: wavefront_one_as_acquire_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: wavefront_one_as_acquire_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront-one-as") acquire
ret void
@@ -692,6 +798,14 @@ define amdgpu_kernel void @wavefront_one_as_release_fence() {
; GFX11-CU-LABEL: wavefront_one_as_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: wavefront_one_as_release_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: wavefront_one_as_release_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront-one-as") release
ret void
@@ -741,6 +855,14 @@ define amdgpu_kernel void @wavefront_one_as_acq_rel_fence() {
; GFX11-CU-LABEL: wavefront_one_as_acq_rel_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: wavefront_one_as_acq_rel_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: wavefront_one_as_acq_rel_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront-one-as") acq_rel
ret void
@@ -790,6 +912,14 @@ define amdgpu_kernel void @wavefront_one_as_seq_cst_fence() {
; GFX11-CU-LABEL: wavefront_one_as_seq_cst_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: wavefront_one_as_seq_cst_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: wavefront_one_as_seq_cst_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront-one-as") seq_cst
ret void
@@ -843,6 +973,15 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
; GFX11-CU-LABEL: workgroup_acquire_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: workgroup_acquire_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: workgroup_acquire_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acquire
ret void
@@ -892,6 +1031,14 @@ define amdgpu_kernel void @workgroup_release_fence() {
; GFX11-CU-LABEL: workgroup_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: workgroup_release_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: workgroup_release_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") release
ret void
@@ -945,6 +1092,15 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
; GFX11-CU-LABEL: workgroup_acq_rel_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: workgroup_acq_rel_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: workgroup_acq_rel_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acq_rel
ret void
@@ -998,6 +1154,15 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
; GFX11-CU-LABEL: workgroup_seq_cst_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: workgroup_seq_cst_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: workgroup_seq_cst_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") seq_cst
ret void
@@ -1051,6 +1216,15 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
; GFX11-CU-LABEL: workgroup_one_as_acquire_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: workgroup_one_as_acquire_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: workgroup_one_as_acquire_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acquire
ret void
@@ -1100,6 +1274,14 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
; GFX11-CU-LABEL: workgroup_one_as_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: workgroup_one_as_release_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: workgroup_one_as_release_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") release
ret void
@@ -1153,6 +1335,15 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
; GFX11-CU-LABEL: workgroup_one_as_acq_rel_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acq_rel
ret void
@@ -1206,6 +1397,15 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
; GFX11-CU-LABEL: workgroup_one_as_seq_cst_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") seq_cst
ret void
@@ -1269,6 +1469,16 @@ define amdgpu_kernel void @agent_acquire_fence() {
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: agent_acquire_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: agent_acquire_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("agent") acquire
ret void
@@ -1320,6 +1530,14 @@ define amdgpu_kernel void @agent_release_fence() {
; GFX11-CU-LABEL: agent_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: agent_release_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: agent_release_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("agent") release
ret void
@@ -1385,6 +1603,16 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: agent_acq_rel_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: agent_acq_rel_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("agent") acq_rel
ret void
@@ -1450,6 +1678,16 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: agent_seq_cst_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: agent_seq_cst_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("agent") seq_cst
ret void
@@ -1513,6 +1751,16 @@ define amdgpu_kernel void @agent_one_as_acquire_fence() {
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: agent_one_as_acquire_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: agent_one_as_acquire_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") acquire
ret void
@@ -1564,6 +1812,14 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
; GFX11-CU-LABEL: agent_one_as_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: agent_one_as_release_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: agent_one_as_release_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") release
ret void
@@ -1629,6 +1885,16 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: agent_one_as_acq_rel_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: agent_one_as_acq_rel_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") acq_rel
ret void
@@ -1694,6 +1960,16 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: agent_one_as_seq_cst_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: agent_one_as_seq_cst_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") seq_cst
ret void
@@ -1759,6 +2035,16 @@ define amdgpu_kernel void @system_acquire_fence() {
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: system_acquire_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: system_acquire_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-CU-NEXT: s_endpgm
entry:
fence acquire
ret void
@@ -1812,6 +2098,14 @@ define amdgpu_kernel void @system_release_fence() {
; GFX11-CU-LABEL: system_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: system_release_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: system_release_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence release
ret void
@@ -1881,6 +2175,16 @@ define amdgpu_kernel void @system_acq_rel_fence() {
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: system_acq_rel_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: system_acq_rel_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-CU-NEXT: s_endpgm
entry:
fence acq_rel
ret void
@@ -1950,6 +2254,16 @@ define amdgpu_kernel void @system_seq_cst_fence() {
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: system_seq_cst_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: system_seq_cst_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-CU-NEXT: s_endpgm
e...
[truncated]
|
Iterator MI can advance in insertWait() but we need original instruction to set temporal hint. Just move it before handling volatile.
c5f3f7c
to
febe0d9
Compare
Rebase |
I wonder if this is worth backporting to the LLVM 18 release branch. |
Iterator MI can advance in insertWait() but we need original instruction to set temporal hint. Just move it before handling volatile.
Iterator MI can advance in insertWait() but we need original instruction to
set temporal hint. Just move it before handling volatile.