From 07ed8187acc31ac3f4779da452864a29d48799ac Mon Sep 17 00:00:00 2001 From: Alex MacLean Date: Fri, 24 Jan 2025 16:56:10 -0800 Subject: [PATCH] [OpenMP] Replace nvvm.annotation usage with kernel calling conventions (#122320) Specifying a kernel with the `ptx_kernel` or `amdgpu_kernel` calling convention is a more idiomatic and compile-time performant than using the `nvvm.annoation !"kernel"` metadata. Transition OMPIRBuilder to use calling conventions for PTX kernels and no longer emit `nvvm.annoation`. Update OpenMPOpt to work with kernels specified via calling convention as well as metadata. Update OpenMP tests to use the calling conventions. --- clang/test/OpenMP/assumes_include_nvptx.cpp | 4 +- .../nvptx_target_firstprivate_codegen.cpp | 2 +- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 16 +- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 61 +- .../Transforms/OpenMP/always_inline_device.ll | 17 +- .../attributor_module_slice_reproducer.ll | 6 +- .../test/Transforms/OpenMP/barrier_removal.ll | 146 +- llvm/test/Transforms/OpenMP/bug66687.ll | 11 +- .../OpenMP/custom_state_machines.ll | 81 +- .../OpenMP/custom_state_machines_pre_lto.ll | 109 +- .../OpenMP/custom_state_machines_remarks.ll | 7 +- .../Transforms/OpenMP/deduplication_target.ll | 4 +- .../get_hardware_num_threads_in_block_fold.ll | 13 +- ...dware_num_threads_in_block_fold_optnone.ll | 13 +- .../Transforms/OpenMP/global_constructor.ll | 13 +- .../OpenMP/globalization_remarks.ll | 4 +- .../OpenMP/gpu_kernel_detection_remarks.ll | 9 +- ..._state_machine_function_ptr_replacement.ll | 4 +- .../OpenMP/is_spmd_exec_mode_fold.ll | 17 +- .../Transforms/OpenMP/nested_parallelism.ll | 13 +- .../Transforms/OpenMP/parallel_level_fold.ll | 13 +- .../Transforms/OpenMP/remove_globalization.ll | 26 +- .../OpenMP/replace_globalization.ll | 21 +- .../OpenMP/single_threaded_execution.ll | 4 +- llvm/test/Transforms/OpenMP/spmdization.ll | 1607 +++-------------- .../Transforms/OpenMP/spmdization_assumes.ll | 27 +- .../OpenMP/spmdization_constant_prop.ll | 2 - .../Transforms/OpenMP/spmdization_guarding.ll | 106 +- ...mdization_guarding_two_reaching_kernels.ll | 36 +- .../Transforms/OpenMP/spmdization_indirect.ll | 161 +- ...zation_no_guarding_two_reaching_kernels.ll | 35 +- .../Transforms/OpenMP/spmdization_remarks.ll | 7 +- .../OpenMP/value-simplify-openmp-opt.ll | 96 +- .../Frontend/OpenMPIRBuilderTest.cpp | 20 - 34 files changed, 606 insertions(+), 2105 deletions(-) diff --git a/clang/test/OpenMP/assumes_include_nvptx.cpp b/clang/test/OpenMP/assumes_include_nvptx.cpp index 4577ea4c9c2b5e..c5040989a0e407 100644 --- a/clang/test/OpenMP/assumes_include_nvptx.cpp +++ b/clang/test/OpenMP/assumes_include_nvptx.cpp @@ -11,11 +11,11 @@ // TODO: Think about teaching the OMPIRBuilder about default attributes as well so the __kmpc* declarations are annotated. -// CHECK: define weak_odr protected void @__omp_offloading_{{.*}}__Z17complex_reductionIfEvv_{{.*}}({{.*}}) [[attr0:#[0-9]]] +// CHECK: define weak_odr protected ptx_kernel void @__omp_offloading_{{.*}}__Z17complex_reductionIfEvv_{{.*}}({{.*}}) [[attr0:#[0-9]]] // CHECK: call i32 @__kmpc_target_init( // CHECK: declare noundef float @_Z3sinf(float noundef) [[attr1:#[0-9]*]] // CHECK: declare void @__kmpc_target_deinit( -// CHECK: define weak_odr protected void @__omp_offloading_{{.*}}__Z17complex_reductionIdEvv_{{.*}}({{.*}}) [[attr0]] +// CHECK: define weak_odr protected ptx_kernel void @__omp_offloading_{{.*}}__Z17complex_reductionIdEvv_{{.*}}({{.*}}) [[attr0]] // CHECK: %call = call noundef double @_Z3sind(double noundef 0.000000e+00) [[attr2:#[0-9]]] // CHECK: declare noundef double @_Z3sind(double noundef) [[attr1]] diff --git a/clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp b/clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp index d573f1cd193d64..94ace20826db4d 100644 --- a/clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp @@ -90,7 +90,7 @@ int foo(int n, double *ptr) { ptr[0]++; } - // TCHECK: define weak_odr protected void @__omp_offloading_{{.+}}(ptr {{[^,]+}}, ptr noundef [[PTR_IN:%.+]]) + // TCHECK: define weak_odr protected ptx_kernel void @__omp_offloading_{{.+}}(ptr {{[^,]+}}, ptr noundef [[PTR_IN:%.+]]) // TCHECK: [[DYN_PTR_ADDR:%.+]] = alloca ptr, // TCHECK: [[PTR_ADDR:%.+]] = alloca ptr, // TCHECK-NOT: alloca ptr, diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 777391327f77c6..8cc3a99d92023d 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -6468,6 +6468,8 @@ void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes( OutlinedFn->setVisibility(GlobalValue::ProtectedVisibility); if (T.isAMDGCN()) OutlinedFn->setCallingConv(CallingConv::AMDGPU_KERNEL); + else if (T.isNVPTX()) + OutlinedFn->setCallingConv(CallingConv::PTX_Kernel); } } @@ -9223,20 +9225,8 @@ void OpenMPIRBuilder::createOffloadEntry(Constant *ID, Constant *Addr, if (!Fn) return; - Module &M = *(Fn->getParent()); - LLVMContext &Ctx = M.getContext(); - - // Get "nvvm.annotations" metadata node. - NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); - - Metadata *MDVals[] = { - ConstantAsMetadata::get(Fn), MDString::get(Ctx, "kernel"), - ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Ctx), 1))}; - // Append metadata to nvvm.annotations. - MD->addOperand(MDNode::get(Ctx, MDVals)); - // Add a function attribute for the kernel. - Fn->addFnAttr(Attribute::get(Ctx, "kernel")); + Fn->addFnAttr("kernel"); if (T.isAMDGCN()) Fn->addFnAttr("uniform-work-group-size", "true"); Fn->addFnAttr(Attribute::MustProgress); diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index e7221ee406a183..10008130016c3b 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -19,6 +19,7 @@ #include "llvm/Transforms/IPO/OpenMPOpt.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/EnumeratedArray.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" @@ -36,6 +37,7 @@ #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/Assumptions.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" @@ -5903,34 +5905,51 @@ bool llvm::omp::isOpenMPKernel(Function &Fn) { return Fn.hasFnAttribute("kernel"); } +static bool isKernelCC(Function &F) { + switch (F.getCallingConv()) { + default: + return false; + case CallingConv::PTX_Kernel: + case CallingConv::AMDGPU_KERNEL: + case CallingConv::SPIR_KERNEL: + return true; + } +} + KernelSet llvm::omp::getDeviceKernels(Module &M) { // TODO: Create a more cross-platform way of determining device kernels. - NamedMDNode *MD = M.getNamedMetadata("nvvm.annotations"); KernelSet Kernels; - if (!MD) - return Kernels; + DenseSet SeenKernels; + auto ProcessKernel = [&](Function &KF) { + if (SeenKernels.insert(&KF).second) { + // We are only interested in OpenMP target regions. Others, such as + // kernels generated by CUDA but linked together, are not interesting to + // this pass. + if (isOpenMPKernel(KF)) { + ++NumOpenMPTargetRegionKernels; + Kernels.insert(&KF); + } else + ++NumNonOpenMPTargetRegionKernels; + } + }; - for (auto *Op : MD->operands()) { - if (Op->getNumOperands() < 2) - continue; - MDString *KindID = dyn_cast(Op->getOperand(1)); - if (!KindID || KindID->getString() != "kernel") - continue; + if (NamedMDNode *MD = M.getNamedMetadata("nvvm.annotations")) + for (auto *Op : MD->operands()) { + if (Op->getNumOperands() < 2) + continue; + MDString *KindID = dyn_cast(Op->getOperand(1)); + if (!KindID || KindID->getString() != "kernel") + continue; - Function *KernelFn = - mdconst::dyn_extract_or_null(Op->getOperand(0)); - if (!KernelFn) - continue; + if (auto *KernelFn = + mdconst::dyn_extract_or_null(Op->getOperand(0))) + ProcessKernel(*KernelFn); + } - // We are only interested in OpenMP target regions. Others, such as kernels - // generated by CUDA but linked together, are not interesting to this pass. - if (isOpenMPKernel(*KernelFn)) { - ++NumOpenMPTargetRegionKernels; - Kernels.insert(KernelFn); - } else - ++NumNonOpenMPTargetRegionKernels; - } + for (Function &F : M) + if (isKernelCC(F)) + ProcessKernel(F); return Kernels; } diff --git a/llvm/test/Transforms/OpenMP/always_inline_device.ll b/llvm/test/Transforms/OpenMP/always_inline_device.ll index 6028ff5278037b..9c5b19f7a6c88c 100644 --- a/llvm/test/Transforms/OpenMP/always_inline_device.ll +++ b/llvm/test/Transforms/OpenMP/always_inline_device.ll @@ -17,7 +17,7 @@ ; CHECK: @G = external global i8 ; CHECK: @kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } ;. -define weak void @__omp_offloading_fd02_c0934fc2_foo_l4(ptr %dyn) #0 { +define weak ptx_kernel void @__omp_offloading_fd02_c0934fc2_foo_l4(ptr %dyn) #0 { ; CHECK: Function Attrs: norecurse nounwind ; CHECK-LABEL: @__omp_offloading_fd02_c0934fc2_foo_l4( ; CHECK-NEXT: entry: @@ -79,12 +79,10 @@ attributes #1 = { convergent nounwind "frame-pointer"="all" "min-legal-vector-wi attributes #2 = { convergent } !omp_offload.info = !{!0} -!nvvm.annotations = !{!1} !llvm.module.flags = !{!2, !3, !4, !5, !6} !llvm.ident = !{!7} !0 = !{i32 0, i32 64770, i32 -1064087614, !"foo", i32 4, i32 0} -!1 = !{ptr @__omp_offloading_fd02_c0934fc2_foo_l4, !"kernel", i32 1} !2 = !{i32 1, !"wchar_size", i32 4} !3 = !{i32 7, !"openmp", i32 50} !4 = !{i32 7, !"openmp-device", i32 50} @@ -97,11 +95,10 @@ attributes #2 = { convergent } ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 -1064087614, !"foo", i32 4, i32 0} -; CHECK: [[META1:![0-9]+]] = !{ptr @__omp_offloading_fd02_c0934fc2_foo_l4, !"kernel", i32 1} -; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CHECK: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} -; CHECK: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} -; CHECK: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +; CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; CHECK: [[META2:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; CHECK: [[META4:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} +; CHECK: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} +; CHECK: [[META6:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} ;. diff --git a/llvm/test/Transforms/OpenMP/attributor_module_slice_reproducer.ll b/llvm/test/Transforms/OpenMP/attributor_module_slice_reproducer.ll index 9c0416af359d4d..3f4790ee15ac8d 100644 --- a/llvm/test/Transforms/OpenMP/attributor_module_slice_reproducer.ll +++ b/llvm/test/Transforms/OpenMP/attributor_module_slice_reproducer.ll @@ -13,10 +13,6 @@ define linkonce_odr hidden i8 @_ZStplIdESt7complexIT_ERKS2_S4_() local_unnamed_a ret i8 undef } -declare void @__omp_offloading_2b_4010cad__ZN11qmcplusplus7ompBLAS17gemv_batched_implIfEEiRiciiPKT_PKS5_iS7_iS5_PKPS3_ii_l148(i64, i64, i64, ptr, ptr, i64, ptr, ptr, ptr, i64) local_unnamed_addr +declare ptx_kernel void @__omp_offloading_2b_4010cad__ZN11qmcplusplus7ompBLAS17gemv_batched_implIfEEiRiciiPKT_PKS5_iS7_iS5_PKPS3_ii_l148(i64, i64, i64, ptr, ptr, i64, ptr, ptr, ptr, i64) local_unnamed_addr declare dso_local fastcc void @__kmpc_for_static_init_8u() unnamed_addr - -!nvvm.annotations = !{!0} - -!0 = !{ptr @__omp_offloading_2b_4010cad__ZN11qmcplusplus7ompBLAS17gemv_batched_implIfEEiRiciiPKT_PKS5_iS7_iS5_PKPS3_ii_l148, !"kernel", i32 1} diff --git a/llvm/test/Transforms/OpenMP/barrier_removal.ll b/llvm/test/Transforms/OpenMP/barrier_removal.ll index 47a5d5104aa8bd..5b7544b1a79616 100644 --- a/llvm/test/Transforms/OpenMP/barrier_removal.ll +++ b/llvm/test/Transforms/OpenMP/barrier_removal.ll @@ -28,7 +28,7 @@ declare void @llvm.assume(i1) ; CHECK: @G1 = global i32 42 ; CHECK: @G2 = addrspace(1) global i32 0 ;. -define void @pos_empty_1(i1 %c) "kernel" { +define amdgpu_kernel void @pos_empty_1(i1 %c) "kernel" { ; MODULE-LABEL: define {{[^@]+}}@pos_empty_1 ; MODULE-SAME: (i1 [[C:%.*]]) #[[ATTR4:[0-9]+]] { ; MODULE-NEXT: ret void @@ -45,7 +45,7 @@ define void @pos_empty_1(i1 %c) "kernel" { call void @llvm.assume(i1 %c) ret void } -define void @pos_empty_2() "kernel" { +define amdgpu_kernel void @pos_empty_2() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@pos_empty_2 ; CHECK-SAME: () #[[ATTR4:[0-9]+]] { ; CHECK-NEXT: ret void @@ -53,7 +53,7 @@ define void @pos_empty_2() "kernel" { call void @aligned_barrier() ret void } -define void @pos_empty_3() "kernel" { +define amdgpu_kernel void @pos_empty_3() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@pos_empty_3 ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: ret void @@ -61,7 +61,7 @@ define void @pos_empty_3() "kernel" { call void @llvm.nvvm.barrier0() ret void } -define void @pos_empty_4() "kernel" { +define amdgpu_kernel void @pos_empty_4() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@pos_empty_4 ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: ret void @@ -69,7 +69,7 @@ define void @pos_empty_4() "kernel" { call i32 @llvm.nvvm.barrier0.and(i32 0) ret void } -define void @pos_empty_5() "kernel" { +define amdgpu_kernel void @pos_empty_5() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@pos_empty_5 ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: ret void @@ -77,7 +77,7 @@ define void @pos_empty_5() "kernel" { call i32 @llvm.nvvm.barrier0.or(i32 0) ret void } -define void @pos_empty_6() "kernel" { +define amdgpu_kernel void @pos_empty_6() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@pos_empty_6 ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: ret void @@ -85,7 +85,7 @@ define void @pos_empty_6() "kernel" { call i32 @llvm.nvvm.barrier0.popc(i32 0) ret void } -define void @pos_empty_7a() "kernel" { +define amdgpu_kernel void @pos_empty_7a() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@pos_empty_7a ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: call void @unknown() @@ -96,7 +96,7 @@ define void @pos_empty_7a() "kernel" { ret void } ; FIXME: We should remove the barrier. -define void @pos_empty_7b() "kernel" { +define amdgpu_kernel void @pos_empty_7b() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@pos_empty_7b ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: call void @unknown() #[[ATTR5:[0-9]+]] @@ -109,7 +109,7 @@ define void @pos_empty_7b() "kernel" { call void @unknown() ret void } -define void @pos_empty_8(i1 %c) "kernel" { +define amdgpu_kernel void @pos_empty_8(i1 %c) "kernel" { ; CHECK-LABEL: define {{[^@]+}}@pos_empty_8 ; CHECK-SAME: (i1 [[C:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] @@ -126,7 +126,7 @@ t: f: ret void } -define void @neg_empty_8() "kernel" { +define amdgpu_kernel void @neg_empty_8() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@neg_empty_8 ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: call void @unknown() @@ -137,7 +137,7 @@ define void @neg_empty_8() "kernel" { call void @llvm.amdgcn.s.barrier() ret void } -define void @neg_empty_9(i1 %c) "kernel" { +define amdgpu_kernel void @neg_empty_9(i1 %c) "kernel" { ; CHECK-LABEL: define {{[^@]+}}@neg_empty_9 ; CHECK-SAME: (i1 [[C:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] @@ -173,7 +173,7 @@ m: ret void } ; FIXME: We should remove the barrier -define void @pos_empty_10() "kernel" { +define amdgpu_kernel void @pos_empty_10() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@pos_empty_10 ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: br label [[M:%.*]] @@ -186,7 +186,7 @@ m: call void @llvm.amdgcn.s.barrier() ret void } -define void @pos_empty_11() "kernel" { +define amdgpu_kernel void @pos_empty_11() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@pos_empty_11 ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: br label [[M:%.*]] @@ -206,7 +206,7 @@ define void @empty() { ret void } ; FIXME: We should remove the barrier in the end but not the first one. -define void @neg_empty_12(i1 %c) "kernel" { +define amdgpu_kernel void @neg_empty_12(i1 %c) "kernel" { ; MODULE-LABEL: define {{[^@]+}}@neg_empty_12 ; MODULE-SAME: (i1 [[C:%.*]]) #[[ATTR4]] { ; MODULE-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] @@ -266,7 +266,7 @@ define void @neg_empty_2() "kernel" { @GC1 = constant i32 42 @GC2 = addrspace(4) global i32 0 @GPtr4 = addrspace(4) global ptr addrspace(4) null -define void @pos_constant_loads() "kernel" { +define amdgpu_kernel void @pos_constant_loads() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@pos_constant_loads ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: [[ARG:%.*]] = load ptr addrspace(4), ptr addrspace(4) @GPtr4, align 8 @@ -296,7 +296,7 @@ define void @pos_constant_loads() "kernel" { @GS = addrspace(3) global i32 0 @GPtr = global ptr null ; TODO: We could remove some of the barriers due to the lack of write effects. -define void @neg_loads() "kernel" { +define amdgpu_kernel void @neg_loads() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@neg_loads ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: [[ARG:%.*]] = load ptr, ptr @GPtr, align 8 @@ -327,7 +327,7 @@ define void @neg_loads() "kernel" { @PG1 = thread_local global i32 42 @PG2 = addrspace(5) global i32 0 @GPtr5 = global ptr addrspace(5) null -define void @pos_priv_mem() "kernel" { +define amdgpu_kernel void @pos_priv_mem() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@pos_priv_mem ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: [[ARG:%.*]] = load ptr addrspace(5), ptr @GPtr5, align 4 @@ -358,7 +358,7 @@ define void @pos_priv_mem() "kernel" { } @G1 = global i32 42 @G2 = addrspace(1) global i32 0 -define void @neg_mem() "kernel" { +define amdgpu_kernel void @neg_mem() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@neg_mem ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: [[ARG:%.*]] = load ptr, ptr @GPtr, align 8 @@ -388,7 +388,7 @@ define void @neg_mem() "kernel" { ret void } -define void @pos_multiple() "kernel" { +define amdgpu_kernel void @pos_multiple() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@pos_multiple ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: ret void @@ -404,7 +404,7 @@ define void @pos_multiple() "kernel" { ret void } -define void @multiple_blocks_kernel_1(i1 %c0, i1 %c1) "kernel" { +define amdgpu_kernel void @multiple_blocks_kernel_1(i1 %c0, i1 %c1) "kernel" { ; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_kernel_1 ; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]] @@ -461,7 +461,7 @@ m: ret void } -define void @multiple_blocks_kernel_2(i1 %c0, i1 %c1, ptr %p) "kernel" { +define amdgpu_kernel void @multiple_blocks_kernel_2(i1 %c0, i1 %c1, ptr %p) "kernel" { ; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_kernel_2 ; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: store i32 4, ptr [[P]], align 4 @@ -727,7 +727,7 @@ define internal void @barrier_then_write_then_barrier0(ptr %p) { call void @aligned_barrier() ret void } -define void @multiple_blocks_functions_kernel_effects_0(i1 %c0, i1 %c1, ptr %p) "kernel" { +define amdgpu_kernel void @multiple_blocks_functions_kernel_effects_0(i1 %c0, i1 %c1, ptr %p) "kernel" { ; MODULE-LABEL: define {{[^@]+}}@multiple_blocks_functions_kernel_effects_0 ; MODULE-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR4]] { ; MODULE-NEXT: call void @barrier_then_write_then_barrier0(ptr [[P]]) @@ -1040,7 +1040,7 @@ define internal void @callee_barrier() { call void @aligned_barrier() ret void } -define void @caller_barrier1() "kernel" { +define amdgpu_kernel void @caller_barrier1() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@caller_barrier1 ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: call void @callee_barrier() @@ -1051,7 +1051,7 @@ define void @caller_barrier1() "kernel" { call void @aligned_barrier() ret void } -define void @caller_barrier2() "kernel" { +define amdgpu_kernel void @caller_barrier2() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@caller_barrier2 ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: call void @unknown() @@ -1065,7 +1065,7 @@ define void @caller_barrier2() "kernel" { ret void } -define void @loop_barrier() "kernel" { +define amdgpu_kernel void @loop_barrier() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@loop_barrier ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: entry: @@ -1095,7 +1095,7 @@ exit: ret void } -define void @loop_barrier_end_barriers() "kernel" { +define amdgpu_kernel void @loop_barrier_end_barriers() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: entry: @@ -1129,7 +1129,7 @@ exit: ret void } -define void @loop_barrier_end_barriers_unknown() "kernel" { +define amdgpu_kernel void @loop_barrier_end_barriers_unknown() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers_unknown ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: entry: @@ -1165,7 +1165,7 @@ exit: ret void } -define void @loop_barrier_store() "kernel" { +define amdgpu_kernel void @loop_barrier_store() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@loop_barrier_store ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: entry: @@ -1195,7 +1195,7 @@ exit: ret void } -define void @loop_barrier_end_barriers_store() "kernel" { +define amdgpu_kernel void @loop_barrier_end_barriers_store() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers_store ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: entry: @@ -1232,37 +1232,7 @@ exit: } !llvm.module.flags = !{!16,!15} -!nvvm.annotations = !{!0,!1,!2,!3,!4,!5,!6,!7,!8,!9,!10,!11,!12,!13,!14,!17,!18,!19,!20,!21,!22,!23,!24,!25,!26,!27,!28,!29,!30} -!0 = !{ptr @pos_empty_1, !"kernel", i32 1} -!1 = !{ptr @pos_empty_2, !"kernel", i32 1} -!2 = !{ptr @pos_empty_3, !"kernel", i32 1} -!3 = !{ptr @pos_empty_4, !"kernel", i32 1} -!4 = !{ptr @pos_empty_5, !"kernel", i32 1} -!5 = !{ptr @pos_empty_6, !"kernel", i32 1} -!17 = !{ptr @pos_empty_7a, !"kernel", i32 1} -!18 = !{ptr @pos_empty_7b, !"kernel", i32 1} -!23 = !{ptr @pos_empty_8, !"kernel", i32 1} -!24 = !{ptr @caller_barrier1, !"kernel", i32 1} -!25 = !{ptr @caller_barrier2, !"kernel", i32 1} -!26 = !{ptr @loop_barrier, !"kernel", i32 1} -!27 = !{ptr @loop_barrier_end_barriers, !"kernel", i32 1} -!28 = !{ptr @loop_barrier_end_barriers_unknown, !"kernel", i32 1} -!29 = !{ptr @loop_barrier_store, !"kernel", i32 1} -!30 = !{ptr @loop_barrier_end_barriers_store, !"kernel", i32 1} -!6 = !{ptr @neg_empty_8, !"kernel", i32 1} -!19 = !{ptr @neg_empty_9, !"kernel", i32 1} -!20 = !{ptr @pos_empty_10, !"kernel", i32 1} -!21 = !{ptr @pos_empty_11, !"kernel", i32 1} -!22 = !{ptr @neg_empty_12, !"kernel", i32 1} -!7 = !{ptr @pos_constant_loads, !"kernel", i32 1} -!8 = !{ptr @neg_loads, !"kernel", i32 1} -!9 = !{ptr @pos_priv_mem, !"kernel", i32 1} -!10 = !{ptr @neg_mem, !"kernel", i32 1} -!11 = !{ptr @pos_multiple, !"kernel", i32 1} -!12 = !{ptr @multiple_blocks_kernel_1, !"kernel", i32 1} -!13 = !{ptr @multiple_blocks_kernel_2, !"kernel", i32 1} -!14 = !{ptr @multiple_blocks_functions_kernel_effects_0, !"kernel", i32 1} !15 = !{i32 7, !"openmp", i32 50} !16 = !{i32 7, !"openmp-device", i32 50} ;. @@ -1282,65 +1252,7 @@ exit: ;. ; MODULE: [[META0:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} ; MODULE: [[META1:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; MODULE: [[META2:![0-9]+]] = !{ptr @pos_empty_1, !"kernel", i32 1} -; MODULE: [[META3:![0-9]+]] = !{ptr @pos_empty_2, !"kernel", i32 1} -; MODULE: [[META4:![0-9]+]] = !{ptr @pos_empty_3, !"kernel", i32 1} -; MODULE: [[META5:![0-9]+]] = !{ptr @pos_empty_4, !"kernel", i32 1} -; MODULE: [[META6:![0-9]+]] = !{ptr @pos_empty_5, !"kernel", i32 1} -; MODULE: [[META7:![0-9]+]] = !{ptr @pos_empty_6, !"kernel", i32 1} -; MODULE: [[META8:![0-9]+]] = !{ptr @neg_empty_8, !"kernel", i32 1} -; MODULE: [[META9:![0-9]+]] = !{ptr @pos_constant_loads, !"kernel", i32 1} -; MODULE: [[META10:![0-9]+]] = !{ptr @neg_loads, !"kernel", i32 1} -; MODULE: [[META11:![0-9]+]] = !{ptr @pos_priv_mem, !"kernel", i32 1} -; MODULE: [[META12:![0-9]+]] = !{ptr @neg_mem, !"kernel", i32 1} -; MODULE: [[META13:![0-9]+]] = !{ptr @pos_multiple, !"kernel", i32 1} -; MODULE: [[META14:![0-9]+]] = !{ptr @multiple_blocks_kernel_1, !"kernel", i32 1} -; MODULE: [[META15:![0-9]+]] = !{ptr @multiple_blocks_kernel_2, !"kernel", i32 1} -; MODULE: [[META16:![0-9]+]] = !{ptr @multiple_blocks_functions_kernel_effects_0, !"kernel", i32 1} -; MODULE: [[META17:![0-9]+]] = !{ptr @pos_empty_7a, !"kernel", i32 1} -; MODULE: [[META18:![0-9]+]] = !{ptr @pos_empty_7b, !"kernel", i32 1} -; MODULE: [[META19:![0-9]+]] = !{ptr @neg_empty_9, !"kernel", i32 1} -; MODULE: [[META20:![0-9]+]] = !{ptr @pos_empty_10, !"kernel", i32 1} -; MODULE: [[META21:![0-9]+]] = !{ptr @pos_empty_11, !"kernel", i32 1} -; MODULE: [[META22:![0-9]+]] = !{ptr @neg_empty_12, !"kernel", i32 1} -; MODULE: [[META23:![0-9]+]] = !{ptr @pos_empty_8, !"kernel", i32 1} -; MODULE: [[META24:![0-9]+]] = !{ptr @caller_barrier1, !"kernel", i32 1} -; MODULE: [[META25:![0-9]+]] = !{ptr @caller_barrier2, !"kernel", i32 1} -; MODULE: [[META26:![0-9]+]] = !{ptr @loop_barrier, !"kernel", i32 1} -; MODULE: [[META27:![0-9]+]] = !{ptr @loop_barrier_end_barriers, !"kernel", i32 1} -; MODULE: [[META28:![0-9]+]] = !{ptr @loop_barrier_end_barriers_unknown, !"kernel", i32 1} -; MODULE: [[META29:![0-9]+]] = !{ptr @loop_barrier_store, !"kernel", i32 1} -; MODULE: [[META30:![0-9]+]] = !{ptr @loop_barrier_end_barriers_store, !"kernel", i32 1} ;. ; CGSCC: [[META0:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} ; CGSCC: [[META1:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; CGSCC: [[META2:![0-9]+]] = !{ptr @pos_empty_1, !"kernel", i32 1} -; CGSCC: [[META3:![0-9]+]] = !{ptr @pos_empty_2, !"kernel", i32 1} -; CGSCC: [[META4:![0-9]+]] = !{ptr @pos_empty_3, !"kernel", i32 1} -; CGSCC: [[META5:![0-9]+]] = !{ptr @pos_empty_4, !"kernel", i32 1} -; CGSCC: [[META6:![0-9]+]] = !{ptr @pos_empty_5, !"kernel", i32 1} -; CGSCC: [[META7:![0-9]+]] = !{ptr @pos_empty_6, !"kernel", i32 1} -; CGSCC: [[META8:![0-9]+]] = !{ptr @neg_empty_8, !"kernel", i32 1} -; CGSCC: [[META9:![0-9]+]] = !{ptr @pos_constant_loads, !"kernel", i32 1} -; CGSCC: [[META10:![0-9]+]] = !{ptr @neg_loads, !"kernel", i32 1} -; CGSCC: [[META11:![0-9]+]] = !{ptr @pos_priv_mem, !"kernel", i32 1} -; CGSCC: [[META12:![0-9]+]] = !{ptr @neg_mem, !"kernel", i32 1} -; CGSCC: [[META13:![0-9]+]] = !{ptr @pos_multiple, !"kernel", i32 1} -; CGSCC: [[META14:![0-9]+]] = !{ptr @multiple_blocks_kernel_1, !"kernel", i32 1} -; CGSCC: [[META15:![0-9]+]] = !{ptr @multiple_blocks_kernel_2, !"kernel", i32 1} -; CGSCC: [[META16:![0-9]+]] = !{ptr @multiple_blocks_functions_kernel_effects_0, !"kernel", i32 1} -; CGSCC: [[META17:![0-9]+]] = !{ptr @pos_empty_7a, !"kernel", i32 1} -; CGSCC: [[META18:![0-9]+]] = !{ptr @pos_empty_7b, !"kernel", i32 1} -; CGSCC: [[META19:![0-9]+]] = !{ptr @neg_empty_9, !"kernel", i32 1} -; CGSCC: [[META20:![0-9]+]] = !{ptr @pos_empty_10, !"kernel", i32 1} -; CGSCC: [[META21:![0-9]+]] = !{ptr @pos_empty_11, !"kernel", i32 1} -; CGSCC: [[META22:![0-9]+]] = !{ptr @neg_empty_12, !"kernel", i32 1} -; CGSCC: [[META23:![0-9]+]] = !{ptr @pos_empty_8, !"kernel", i32 1} -; CGSCC: [[META24:![0-9]+]] = !{ptr @caller_barrier1, !"kernel", i32 1} -; CGSCC: [[META25:![0-9]+]] = !{ptr @caller_barrier2, !"kernel", i32 1} -; CGSCC: [[META26:![0-9]+]] = !{ptr @loop_barrier, !"kernel", i32 1} -; CGSCC: [[META27:![0-9]+]] = !{ptr @loop_barrier_end_barriers, !"kernel", i32 1} -; CGSCC: [[META28:![0-9]+]] = !{ptr @loop_barrier_end_barriers_unknown, !"kernel", i32 1} -; CGSCC: [[META29:![0-9]+]] = !{ptr @loop_barrier_store, !"kernel", i32 1} -; CGSCC: [[META30:![0-9]+]] = !{ptr @loop_barrier_end_barriers_store, !"kernel", i32 1} ;. diff --git a/llvm/test/Transforms/OpenMP/bug66687.ll b/llvm/test/Transforms/OpenMP/bug66687.ll index e0a9b825a88041..9bb069b1735bed 100644 --- a/llvm/test/Transforms/OpenMP/bug66687.ll +++ b/llvm/test/Transforms/OpenMP/bug66687.ll @@ -5,25 +5,22 @@ source_filename = "bug66687.ll" target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" -define weak void @openmp_kernel() "kernel" { -; CHECK-LABEL: define weak void @openmp_kernel( +define weak ptx_kernel void @openmp_kernel() "kernel" { +; CHECK-LABEL: define weak ptx_kernel void @openmp_kernel( ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: ret void ; ret void } -define weak_odr void @non_openmp_kernel() { -; CHECK-LABEL: define weak_odr void @non_openmp_kernel() { +define weak_odr ptx_kernel void @non_openmp_kernel() { +; CHECK-LABEL: define weak_odr ptx_kernel void @non_openmp_kernel() { ; CHECK-NEXT: ret void ; ret void } !llvm.module.flags = !{!0, !1} -!nvvm.annotations = !{!2, !3} !0 = !{i32 7, !"openmp", i32 51} !1 = !{i32 7, !"openmp-device", i32 51} -!2 = !{ptr @openmp_kernel, !"kernel", i32 1} -!3 = !{ptr @non_openmp_kernel, !"kernel", i32 1} diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines.ll b/llvm/test/Transforms/OpenMP/custom_state_machines.ll index e6ddf16f067634..10e521bbfcc10f 100644 --- a/llvm/test/Transforms/OpenMP/custom_state_machines.ll +++ b/llvm/test/Transforms/OpenMP/custom_state_machines.ll @@ -138,7 +138,7 @@ @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null } @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null } -define weak void @__omp_offloading_14_a36502b_no_state_machine_needed_l14(ptr %dyn) #0 { +define weak ptx_kernel void @__omp_offloading_14_a36502b_no_state_machine_needed_l14(ptr %dyn) #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -197,7 +197,7 @@ declare i32 @__kmpc_global_thread_num(ptr) #3 declare void @__kmpc_target_deinit() -define weak void @__omp_offloading_14_a36502b_simple_state_machine_l22(ptr %dyn) #0 { +define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_l22(ptr %dyn) #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -290,7 +290,7 @@ entry: ret void } -define weak void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39(ptr %dyn) #0 { +define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39(ptr %dyn) #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -367,7 +367,7 @@ entry: ret void } -define weak void @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55(ptr %dyn) #0 { +define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55(ptr %dyn) #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -453,7 +453,7 @@ entry: ret void } -define weak void @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66(ptr %dyn) #0 { +define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66(ptr %dyn) #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -537,7 +537,7 @@ entry: ret void } -define weak void @__omp_offloading_14_a36502b_simple_state_machine_pure_l77(ptr %dyn) #0 { +define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_pure_l77(ptr %dyn) #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -624,7 +624,7 @@ entry: ret void } -define weak void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92(ptr %dyn) #0 { +define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92(ptr %dyn) #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -679,7 +679,7 @@ return: ; preds = %if.end, %if.then declare i32 @omp_get_thread_num(...) #4 -define weak void @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112(ptr %dyn) #0 { +define weak ptx_kernel void @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112(ptr %dyn) #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -812,7 +812,6 @@ attributes #8 = { convergent "llvm.assume"="omp_no_openmp" } attributes #9 = { convergent nounwind readonly willreturn } !omp_offload.info = !{!0, !1, !2, !3, !4, !5, !6, !7} -!nvvm.annotations = !{!8, !9, !10, !11, !12, !13, !14, !15} !llvm.module.flags = !{!16, !17, !18} !0 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} @@ -823,14 +822,6 @@ attributes #9 = { convergent nounwind readonly willreturn } !5 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} !6 = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} !7 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} -!8 = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} -!9 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} -!10 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} -!11 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} -!12 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} -!13 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} -!14 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} -!15 = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} !16 = !{i32 1, !"wchar_size", i32 4} !17 = !{i32 7, !"openmp", i32 50} !18 = !{i32 7, !"openmp-device", i32 50} @@ -4107,17 +4098,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} ; AMDGPU: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} ; AMDGPU: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} -; AMDGPU: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} -; AMDGPU: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} -; AMDGPU: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} -; AMDGPU: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} -; AMDGPU: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} -; AMDGPU: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} -; AMDGPU: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} -; AMDGPU: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} -; AMDGPU: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; AMDGPU: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; AMDGPU: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; AMDGPU: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; AMDGPU: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; AMDGPU: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} ;. ; NVPTX: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} ; NVPTX: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} @@ -4127,17 +4110,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} ; NVPTX: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} ; NVPTX: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} -; NVPTX: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} -; NVPTX: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} -; NVPTX: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} -; NVPTX: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} -; NVPTX: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} -; NVPTX: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} -; NVPTX: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} -; NVPTX: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} -; NVPTX: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; NVPTX: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; NVPTX: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; NVPTX: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; NVPTX: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; NVPTX: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} ;. ; AMDGPU-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} ; AMDGPU-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} @@ -4147,17 +4122,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} ; AMDGPU-DISABLED: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} ; AMDGPU-DISABLED: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} -; AMDGPU-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; AMDGPU-DISABLED: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; AMDGPU-DISABLED: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; AMDGPU-DISABLED: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; AMDGPU-DISABLED: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; AMDGPU-DISABLED: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} ;. ; NVPTX-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} ; NVPTX-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} @@ -4167,15 +4134,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} ; NVPTX-DISABLED: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} ; NVPTX-DISABLED: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} -; NVPTX-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} -; NVPTX-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} -; NVPTX-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} -; NVPTX-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} -; NVPTX-DISABLED: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} -; NVPTX-DISABLED: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} -; NVPTX-DISABLED: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} -; NVPTX-DISABLED: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} -; NVPTX-DISABLED: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; NVPTX-DISABLED: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; NVPTX-DISABLED: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; NVPTX-DISABLED: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; NVPTX-DISABLED: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; NVPTX-DISABLED: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} ;. diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll b/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll index d20821d450365b..9576ff6ca6aeee 100644 --- a/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll +++ b/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll @@ -139,7 +139,7 @@ @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null } @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null } -define weak void @__omp_offloading_14_a36502b_no_state_machine_needed_l14(ptr %dyn) #0 { +define weak ptx_kernel void @__omp_offloading_14_a36502b_no_state_machine_needed_l14(ptr %dyn) #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -196,7 +196,7 @@ declare i32 @__kmpc_global_thread_num(ptr) #3 declare void @__kmpc_target_deinit() -define weak void @__omp_offloading_14_a36502b_simple_state_machine_l22(ptr %dyn) #0 { +define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_l22(ptr %dyn) #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -289,7 +289,7 @@ entry: ret void } -define weak void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39(ptr %dyn) #0 { +define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39(ptr %dyn) #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -366,7 +366,7 @@ entry: ret void } -define weak void @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55(ptr %dyn) #0 { +define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55(ptr %dyn) #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -452,7 +452,7 @@ entry: ret void } -define weak void @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66(ptr %dyn) #0 { +define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66(ptr %dyn) #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -536,7 +536,7 @@ entry: ret void } -define weak void @__omp_offloading_14_a36502b_simple_state_machine_pure_l77(ptr %dyn) #0 { +define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_pure_l77(ptr %dyn) #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -623,7 +623,7 @@ entry: ret void } -define weak void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92(ptr %dyn) #0 { +define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92(ptr %dyn) #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -678,7 +678,7 @@ return: ; preds = %if.end, %if.then declare i32 @omp_get_thread_num(...) #4 -define weak void @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112(ptr %dyn) #0 { +define weak ptx_kernel void @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112(ptr %dyn) #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -811,7 +811,6 @@ attributes #8 = { convergent "llvm.assume"="omp_no_openmp" } attributes #9 = { convergent nounwind readonly willreturn } !omp_offload.info = !{!0, !1, !2, !3, !4, !5, !6, !7} -!nvvm.annotations = !{!8, !9, !10, !11, !12, !13, !14, !15} !llvm.module.flags = !{!16, !17, !18} !0 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} @@ -822,14 +821,6 @@ attributes #9 = { convergent nounwind readonly willreturn } !5 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} !6 = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} !7 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} -!8 = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} -!9 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} -!10 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} -!11 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} -!12 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} -!13 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} -!14 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} -!15 = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} !16 = !{i32 1, !"wchar_size", i32 4} !17 = !{i32 7, !"openmp", i32 50} !18 = !{i32 7, !"openmp-device", i32 50} @@ -4976,17 +4967,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} ; AMDGPU1: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} ; AMDGPU1: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} -; AMDGPU1: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} -; AMDGPU1: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} -; AMDGPU1: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} -; AMDGPU1: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} -; AMDGPU1: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} -; AMDGPU1: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} -; AMDGPU1: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} -; AMDGPU1: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} -; AMDGPU1: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; AMDGPU1: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; AMDGPU1: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; AMDGPU1: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; AMDGPU1: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; AMDGPU1: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} ;. ; NVPTX1: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} ; NVPTX1: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} @@ -4996,17 +4979,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX1: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} ; NVPTX1: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} ; NVPTX1: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} -; NVPTX1: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} -; NVPTX1: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} -; NVPTX1: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} -; NVPTX1: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} -; NVPTX1: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} -; NVPTX1: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} -; NVPTX1: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} -; NVPTX1: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} -; NVPTX1: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; NVPTX1: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; NVPTX1: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; NVPTX1: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; NVPTX1: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; NVPTX1: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} ;. ; AMDGPU2: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} ; AMDGPU2: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} @@ -5016,17 +4991,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} ; AMDGPU2: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} ; AMDGPU2: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} -; AMDGPU2: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} -; AMDGPU2: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} -; AMDGPU2: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} -; AMDGPU2: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} -; AMDGPU2: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} -; AMDGPU2: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} -; AMDGPU2: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} -; AMDGPU2: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} -; AMDGPU2: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; AMDGPU2: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; AMDGPU2: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; AMDGPU2: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; AMDGPU2: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; AMDGPU2: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} ;. ; AMDGPU3: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} ; AMDGPU3: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} @@ -5036,17 +5003,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} ; AMDGPU3: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} ; AMDGPU3: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} -; AMDGPU3: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} -; AMDGPU3: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} -; AMDGPU3: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} -; AMDGPU3: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} -; AMDGPU3: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} -; AMDGPU3: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} -; AMDGPU3: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} -; AMDGPU3: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} -; AMDGPU3: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; AMDGPU3: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; AMDGPU3: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; AMDGPU3: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; AMDGPU3: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; AMDGPU3: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} ;. ; NVPTX2: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} ; NVPTX2: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} @@ -5056,17 +5015,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX2: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} ; NVPTX2: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} ; NVPTX2: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} -; NVPTX2: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} -; NVPTX2: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} -; NVPTX2: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} -; NVPTX2: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} -; NVPTX2: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} -; NVPTX2: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} -; NVPTX2: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} -; NVPTX2: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} -; NVPTX2: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; NVPTX2: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; NVPTX2: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; NVPTX2: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; NVPTX2: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; NVPTX2: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} ;. ; NVPTX3: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} ; NVPTX3: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} @@ -5076,15 +5027,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX3: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} ; NVPTX3: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} ; NVPTX3: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} -; NVPTX3: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} -; NVPTX3: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} -; NVPTX3: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} -; NVPTX3: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} -; NVPTX3: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} -; NVPTX3: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} -; NVPTX3: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} -; NVPTX3: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} -; NVPTX3: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; NVPTX3: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; NVPTX3: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; NVPTX3: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; NVPTX3: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; NVPTX3: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} ;. diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll b/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll index f7bfd306506944..ad41639511e994 100644 --- a/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll +++ b/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll @@ -59,7 +59,7 @@ target triple = "nvptx64" ; Function Attrs: convergent norecurse nounwind -define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11(ptr %dyn) local_unnamed_addr #0 !dbg !15 { +define weak ptx_kernel void @__omp_offloading_2a_d80d3d_test_fallback_l11(ptr %dyn) local_unnamed_addr #0 !dbg !15 { entry: %captured_vars_addrs.i.i = alloca [0 x ptr], align 8 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2a_d80d3d_test_fallback_l11_kernel_environment, ptr %dyn) #3, !dbg !18 @@ -104,7 +104,7 @@ declare i32 @__kmpc_global_thread_num(ptr) local_unnamed_addr #3 declare void @__kmpc_target_deinit() local_unnamed_addr ; Function Attrs: norecurse nounwind -define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20(ptr %dyn) local_unnamed_addr #4 !dbg !32 { +define weak ptx_kernel void @__omp_offloading_2a_d80d3d_test_no_fallback_l20(ptr %dyn) local_unnamed_addr #4 !dbg !32 { entry: %captured_vars_addrs.i2.i = alloca [0 x ptr], align 8 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20_kernel_environment, ptr %dyn) #3, !dbg !33 @@ -175,7 +175,6 @@ attributes #8 = { "llvm.assume"="omp_no_parallelism" } !llvm.dbg.cu = !{!0} !omp_offload.info = !{!3, !4} -!nvvm.annotations = !{!5, !6} !llvm.module.flags = !{!7, !8, !9, !10, !11, !12, !13} !llvm.ident = !{!14} @@ -184,8 +183,6 @@ attributes #8 = { "llvm.assume"="omp_no_parallelism" } !2 = !{} !3 = !{i32 0, i32 42, i32 14159165, !"test_no_fallback", i32 20, i32 1} !4 = !{i32 0, i32 42, i32 14159165, !"test_fallback", i32 11, i32 0} -!5 = !{ptr @__omp_offloading_2a_d80d3d_test_fallback_l11, !"kernel", i32 1} -!6 = !{ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20, !"kernel", i32 1} !7 = !{i32 7, !"Dwarf Version", i32 2} !8 = !{i32 2, !"Debug Info Version", i32 3} !9 = !{i32 1, !"wchar_size", i32 4} diff --git a/llvm/test/Transforms/OpenMP/deduplication_target.ll b/llvm/test/Transforms/OpenMP/deduplication_target.ll index 6b0563365c6480..7027c3275b9329 100644 --- a/llvm/test/Transforms/OpenMP/deduplication_target.ll +++ b/llvm/test/Transforms/OpenMP/deduplication_target.ll @@ -15,7 +15,7 @@ target triple = "nvptx64" declare void @use(i32) -define weak void @__omp_offloading_50_a3e09bf8_foo_l2(ptr %dyn) #0 { +define weak ptx_kernel void @__omp_offloading_50_a3e09bf8_foo_l2(ptr %dyn) #0 { ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_50_a3e09bf8_foo_l2 ; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -56,11 +56,9 @@ attributes #0 = { convergent noinline norecurse nounwind "kernel" "frame-pointer attributes #1 = { nounwind } !omp_offload.info = !{!0} -!nvvm.annotations = !{!1} !llvm.module.flags = !{!2, !3, !4} !0 = !{i32 0, i32 80, i32 -1545561096, !"foo", i32 2, i32 0} -!1 = !{ptr @__omp_offloading_50_a3e09bf8_foo_l2, !"kernel", i32 1} !2 = !{i32 1, !"wchar_size", i32 4} !3 = !{i32 7, !"openmp", i32 50} !4 = !{i32 7, !"openmp-device", i32 50} diff --git a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll index 6102201ad4bac7..6a4519a161fd6f 100644 --- a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll +++ b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll @@ -19,7 +19,7 @@ target triple = "nvptx64" ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" ; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8 ;. -define weak void @kernel0(ptr %dyn) "kernel" #0 { +define weak ptx_kernel void @kernel0(ptr %dyn) "kernel" #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel0 ; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @kernel0_kernel_environment, ptr [[DYN]]) @@ -43,7 +43,7 @@ define weak void @kernel0(ptr %dyn) "kernel" #0 { ret void } -define weak void @kernel1(ptr %dyn) "kernel" #0 { +define weak ptx_kernel void @kernel1(ptr %dyn) "kernel" #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel1 ; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @kernel1_kernel_environment, ptr [[DYN]]) @@ -63,7 +63,7 @@ define weak void @kernel1(ptr %dyn) "kernel" #0 { ret void } -define weak void @kernel2(ptr %dyn) "kernel" #0 { +define weak ptx_kernel void @kernel2(ptr %dyn) "kernel" #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel2 ; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -200,15 +200,11 @@ declare i32 @__kmpc_global_thread_num(ptr) !llvm.module.flags = !{!0, !1} -!nvvm.annotations = !{!2, !3, !4} attributes #0 = { "omp_target_thread_limit"="666" "omp_target_num_teams"="777"} !0 = !{i32 7, !"openmp", i32 50} !1 = !{i32 7, !"openmp-device", i32 50} -!2 = !{ptr @kernel0, !"kernel", i32 1} -!3 = !{ptr @kernel1, !"kernel", i32 1} -!4 = !{ptr @kernel2, !"kernel", i32 1} ;. ; CHECK: attributes #[[ATTR0]] = { "kernel" "omp_target_num_teams"="777" "omp_target_thread_limit"="666" } ; CHECK: attributes #[[ATTR1]] = { nounwind } @@ -217,7 +213,4 @@ attributes #0 = { "omp_target_thread_limit"="666" "omp_target_num_teams"="777"} ;. ; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CHECK: [[META2:![0-9]+]] = !{ptr @kernel0, !"kernel", i32 1} -; CHECK: [[META3:![0-9]+]] = !{ptr @kernel1, !"kernel", i32 1} -; CHECK: [[META4:![0-9]+]] = !{ptr @kernel2, !"kernel", i32 1} ;. diff --git a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll index 0cf6e7488b4dd4..3037d24b8c4484 100644 --- a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll +++ b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll @@ -7,7 +7,7 @@ target triple = "nvptx64" ;. ; CHECK: @G = external global i32 ;. -define weak void @kernel0() #0 { +define weak ptx_kernel void @kernel0() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel0 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, ptr null) @@ -25,7 +25,7 @@ define weak void @kernel0() #0 { ret void } -define weak void @kernel1() #0 { +define weak ptx_kernel void @kernel1() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel1 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, ptr null) @@ -39,7 +39,7 @@ define weak void @kernel1() #0 { ret void } -define weak void @kernel2() #0 { +define weak ptx_kernel void @kernel2() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel2 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, ptr null) @@ -107,15 +107,11 @@ declare void @__kmpc_target_deinit() #1 !llvm.module.flags = !{!0, !1} -!nvvm.annotations = !{!2, !3, !4} attributes #0 = { optnone noinline "kernel" "omp_target_thread_limit"="666" "omp_target_num_teams"="777"} !0 = !{i32 7, !"openmp", i32 50} !1 = !{i32 7, !"openmp-device", i32 50} -!2 = !{ptr @kernel0, !"kernel", i32 1} -!3 = !{ptr @kernel1, !"kernel", i32 1} -!4 = !{ptr @kernel2, !"kernel", i32 1} ; ;. ; CHECK: attributes #[[ATTR0]] = { noinline optnone "kernel" "omp_target_num_teams"="777" "omp_target_thread_limit"="666" } @@ -123,7 +119,4 @@ attributes #0 = { optnone noinline "kernel" "omp_target_thread_limit"="666" "omp ;. ; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CHECK: [[META2:![0-9]+]] = !{ptr @kernel0, !"kernel", i32 1} -; CHECK: [[META3:![0-9]+]] = !{ptr @kernel1, !"kernel", i32 1} -; CHECK: [[META4:![0-9]+]] = !{ptr @kernel2, !"kernel", i32 1} ;. diff --git a/llvm/test/Transforms/OpenMP/global_constructor.ll b/llvm/test/Transforms/OpenMP/global_constructor.ll index 804b910dcd3088..1d18e527e14667 100644 --- a/llvm/test/Transforms/OpenMP/global_constructor.ll +++ b/llvm/test/Transforms/OpenMP/global_constructor.ll @@ -10,7 +10,7 @@ @_ZL6Device = internal global double 0.000000e+00, align 8 @__omp_offloading_fd02_85283c04_main_l11_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null } -define weak void @__omp_offloading_fd02_85283c04_main_l11(ptr %dyn, ptr nonnull align 8 dereferenceable(8) %X) local_unnamed_addr "kernel" { +define weak ptx_kernel void @__omp_offloading_fd02_85283c04_main_l11(ptr %dyn, ptr nonnull align 8 dereferenceable(8) %X) local_unnamed_addr "kernel" { entry: %0 = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_85283c04_main_l11_kernel_environment, ptr %dyn) #0 %exec_user_code = icmp eq i32 %0, -1 @@ -39,7 +39,7 @@ declare i32 @__kmpc_target_init(ptr, ptr) local_unnamed_addr declare void @__kmpc_target_deinit() local_unnamed_addr -define weak void @__omp_offloading__fd02_85283c04_Device_l6_ctor() "kernel" { +define weak ptx_kernel void @__omp_offloading__fd02_85283c04_Device_l6_ctor() "kernel" { entry: %call.i = tail call double @__nv_log(double noundef 2.000000e+00) #1 %call.i2 = tail call double @__nv_log(double noundef 2.000000e+00) #1 @@ -58,15 +58,12 @@ attributes #0 = { nounwind } attributes #1 = { convergent nounwind } !omp_offload.info = !{!0, !1, !2} -!nvvm.annotations = !{!3, !4} !llvm.module.flags = !{!5, !6, !7, !8, !9} !llvm.ident = !{!10} !0 = !{i32 0, i32 64770, i32 -2060960764, !"__omp_offloading__fd02_85283c04_Device_l6_ctor", i32 6, i32 1} !1 = !{i32 0, i32 64770, i32 -2060960764, !"main", i32 11, i32 2} !2 = !{i32 1, !"_ZL6Device", i32 0, i32 0} -!3 = !{ptr @__omp_offloading__fd02_85283c04_Device_l6_ctor, !"kernel", i32 1} -!4 = !{ptr @__omp_offloading_fd02_85283c04_main_l11, !"kernel", i32 1} !5 = !{i32 1, !"wchar_size", i32 4} !6 = !{i32 7, !"openmp", i32 50} !7 = !{i32 7, !"openmp-device", i32 50} @@ -86,12 +83,12 @@ attributes #1 = { convergent nounwind } ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr @_ZL6Device, align 8, !tbaa [[TBAA11:![0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr @_ZL6Device, align 8, !tbaa [[TBAA9:![0-9]+]] ; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR1]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] ; CHECK: region.guarded: -; CHECK-NEXT: store double [[TMP1]], ptr [[X]], align 8, !tbaa [[TBAA11]] +; CHECK-NEXT: store double [[TMP1]], ptr [[X]], align 8, !tbaa [[TBAA9]] ; CHECK-NEXT: br label [[REGION_BARRIER]] ; CHECK: region.barrier: ; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1:[0-9]+]], i32 [[TMP2]]) #[[ATTR1]] @@ -105,6 +102,6 @@ attributes #1 = { convergent nounwind } ; CHECK-NEXT: [[CALL_I:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR2:[0-9]+]] ; CHECK-NEXT: [[CALL_I2:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR2]] ; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[CALL_I]], [[CALL_I2]] -; CHECK-NEXT: store double [[DIV]], ptr @_ZL6Device, align 8, !tbaa [[TBAA11]] +; CHECK-NEXT: store double [[DIV]], ptr @_ZL6Device, align 8, !tbaa [[TBAA9]] ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/OpenMP/globalization_remarks.ll b/llvm/test/Transforms/OpenMP/globalization_remarks.ll index 878ac9010a7dc3..0f37b3e070acd3 100644 --- a/llvm/test/Transforms/OpenMP/globalization_remarks.ll +++ b/llvm/test/Transforms/OpenMP/globalization_remarks.ll @@ -13,7 +13,7 @@ target triple = "nvptx64" @S = external local_unnamed_addr global ptr @foo_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null } -define void @foo() "kernel" { +define ptx_kernel void @foo() "kernel" { entry: %c = call i32 @__kmpc_target_init(ptr @foo_kernel_environment, ptr null) %0 = call ptr @__kmpc_alloc_shared(i64 4), !dbg !10 @@ -39,7 +39,6 @@ declare void @__kmpc_target_deinit() !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4, !5, !6} -!nvvm.annotations = !{!7, !8} !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) !1 = !DIFile(filename: "globalization_remarks.c", directory: "/tmp/globalization_remarks.c") @@ -48,7 +47,6 @@ declare void @__kmpc_target_deinit() !4 = !{i32 1, !"wchar_size", i32 4} !5 = !{i32 7, !"openmp", i32 50} !6 = !{i32 7, !"openmp-device", i32 50} -!7 = !{ptr @foo, !"kernel", i32 1} !8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !9, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) !9 = !DISubroutineType(types: !2) !10 = !DILocation(line: 5, column: 7, scope: !8) diff --git a/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll b/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll index b029efbbe3c687..ce17ffcbb20846 100644 --- a/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll +++ b/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll @@ -3,11 +3,11 @@ ; CHECK-DAG: remark: :0:0: OpenMP GPU kernel kernel1 ; CHECK-DAG: remark: :0:0: OpenMP GPU kernel kernel2 -define void @kernel1() "kernel" { +define ptx_kernel void @kernel1() "kernel" { ret void } -define void @kernel2() "kernel" { +define ptx_kernel void @kernel2() "kernel" { ret void } @@ -19,10 +19,5 @@ define void @non_kernel() { declare dso_local void @__kmpc_kernel_prepare_parallel(ptr) !llvm.module.flags = !{!4} -!nvvm.annotations = !{!2, !0, !1, !3, !1, !2} -!0 = !{ptr @kernel1, !"kernel", i32 1} -!1 = !{ptr @non_kernel, !"non_kernel", i32 1} -!2 = !{null, !"align", i32 1} -!3 = !{ptr @kernel2, !"kernel", i32 1} !4 = !{i32 7, !"openmp", i32 50} diff --git a/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll b/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll index 936f7d1c46781a..760c5a354a37ce 100644 --- a/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll +++ b/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll @@ -44,7 +44,7 @@ @2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8 @__omp_offloading_10301_87b2c_foo_l7_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null } -define weak void @__omp_offloading_10301_87b2c_foo_l7() "kernel" { +define weak ptx_kernel void @__omp_offloading_10301_87b2c_foo_l7() "kernel" { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -173,10 +173,8 @@ entry: } !omp_offload.info = !{!0} -!nvvm.annotations = !{!1} !llvm.module.flags = !{!2, !3} !0 = !{i32 0, i32 66305, i32 555956, !"foo", i32 7, i32 0} -!1 = !{ptr @__omp_offloading_10301_87b2c_foo_l7, !"kernel", i32 1} !2 = !{i32 7, !"openmp", i32 50} !3 = !{i32 7, !"openmp-device", i32 50} diff --git a/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll b/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll index 310ac0a8296c33..2b3a7fabfb4595 100644 --- a/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll +++ b/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll @@ -18,7 +18,7 @@ target triple = "nvptx64" ; CHECK: @none_spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null } ; CHECK: @will_not_be_spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null } ;. -define weak void @is_spmd() "kernel" { +define weak ptx_kernel void @is_spmd() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@is_spmd ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @is_spmd_kernel_environment, ptr null) @@ -36,7 +36,7 @@ define weak void @is_spmd() "kernel" { ret void } -define weak void @will_be_spmd() "kernel" { +define weak ptx_kernel void @will_be_spmd() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@will_be_spmd ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -70,7 +70,7 @@ user_code.entry: ret void } -define weak void @non_spmd() "kernel" { +define weak ptx_kernel void @non_spmd() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@non_spmd ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @none_spmd_kernel_environment, ptr null) @@ -88,7 +88,7 @@ define weak void @non_spmd() "kernel" { ret void } -define weak void @will_not_be_spmd() "kernel" { +define weak ptx_kernel void @will_not_be_spmd() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@will_not_be_spmd ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @will_not_be_spmd_kernel_environment, ptr null) @@ -207,14 +207,9 @@ declare void @foo() declare void @bar() !llvm.module.flags = !{!0, !1} -!nvvm.annotations = !{!2, !3, !4, !5} !0 = !{i32 7, !"openmp", i32 50} !1 = !{i32 7, !"openmp-device", i32 50} -!2 = !{ptr @is_spmd, !"kernel", i32 1} -!3 = !{ptr @will_be_spmd, !"kernel", i32 1} -!4 = !{ptr @non_spmd, !"kernel", i32 1} -!5 = !{ptr @will_not_be_spmd, !"kernel", i32 1} ;. ; CHECK: attributes #[[ATTR0]] = { "kernel" } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { "llvm.assume"="ompx_spmd_amenable" } @@ -223,8 +218,4 @@ declare void @bar() ;. ; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CHECK: [[META2:![0-9]+]] = !{ptr @is_spmd, !"kernel", i32 1} -; CHECK: [[META3:![0-9]+]] = !{ptr @will_be_spmd, !"kernel", i32 1} -; CHECK: [[META4:![0-9]+]] = !{ptr @non_spmd, !"kernel", i32 1} -; CHECK: [[META5:![0-9]+]] = !{ptr @will_not_be_spmd, !"kernel", i32 1} ;. diff --git a/llvm/test/Transforms/OpenMP/nested_parallelism.ll b/llvm/test/Transforms/OpenMP/nested_parallelism.ll index 5c4386b24a3d5a..1679a27fdae8bc 100644 --- a/llvm/test/Transforms/OpenMP/nested_parallelism.ll +++ b/llvm/test/Transforms/OpenMP/nested_parallelism.ll @@ -43,7 +43,7 @@ target triple = "nvptx64" ; CHECK: @__omp_offloading_10302_bd7e0_main_l13_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } ; CHECK: @__omp_offloading_10302_bd7e0_main_l16_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } ;. -define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l13(ptr %dyn, i64 noundef %i) local_unnamed_addr "kernel" { +define weak_odr protected ptx_kernel void @__omp_offloading_10302_bd7e0_main_l13(ptr %dyn, i64 noundef %i) local_unnamed_addr "kernel" { ; CHECK-LABEL: @__omp_offloading_10302_bd7e0_main_l13( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS_I:%.*]] = alloca [1 x ptr], align 8 @@ -127,7 +127,7 @@ entry: declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr -define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l16(ptr %dyn, i64 noundef %i) local_unnamed_addr "kernel" { +define weak_odr protected ptx_kernel void @__omp_offloading_10302_bd7e0_main_l16(ptr %dyn, i64 noundef %i) local_unnamed_addr "kernel" { ; CHECK-LABEL: @__omp_offloading_10302_bd7e0_main_l16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS_I:%.*]] = alloca [1 x ptr], align 8 @@ -315,13 +315,10 @@ declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #11 !omp_offload.info = !{!0, !1} -!nvvm.annotations = !{!2, !3} !llvm.module.flags = !{!4, !5} !0 = !{i32 0, i32 66306, i32 776160, !"main", i32 13, i32 0, i32 0} !1 = !{i32 0, i32 66306, i32 776160, !"main", i32 16, i32 0, i32 1} -!2 = !{ptr @__omp_offloading_10302_bd7e0_main_l13, !"kernel", i32 1} -!3 = !{ptr @__omp_offloading_10302_bd7e0_main_l16, !"kernel", i32 1} !4 = !{i32 7, !"openmp", i32 50} !5 = !{i32 7, !"openmp-device", i32 50} @@ -336,8 +333,6 @@ declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #11 ;. ; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 66306, i32 776160, !"main", i32 13, i32 0, i32 0} ; CHECK: [[META1:![0-9]+]] = !{i32 0, i32 66306, i32 776160, !"main", i32 16, i32 0, i32 1} -; CHECK: [[META2:![0-9]+]] = !{ptr @__omp_offloading_10302_bd7e0_main_l13, !"kernel", i32 1} -; CHECK: [[META3:![0-9]+]] = !{ptr @__omp_offloading_10302_bd7e0_main_l16, !"kernel", i32 1} -; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; CHECK: [[META5:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; CHECK: [[META2:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} ;. diff --git a/llvm/test/Transforms/OpenMP/parallel_level_fold.ll b/llvm/test/Transforms/OpenMP/parallel_level_fold.ll index fd6e7683af8e3e..e5f65b26ed223c 100644 --- a/llvm/test/Transforms/OpenMP/parallel_level_fold.ll +++ b/llvm/test/Transforms/OpenMP/parallel_level_fold.ll @@ -16,7 +16,7 @@ target triple = "nvptx64" ; CHECK: @spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null } ; CHECK: @parallel_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null } ;. -define weak void @none_spmd() "kernel" { +define weak ptx_kernel void @none_spmd() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@none_spmd ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @none_spmd_kernel_environment, ptr null) @@ -32,7 +32,7 @@ define weak void @none_spmd() "kernel" { ret void } -define weak void @spmd() "kernel" { +define weak ptx_kernel void @spmd() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@spmd ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_kernel_environment, ptr null) @@ -48,7 +48,7 @@ define weak void @spmd() "kernel" { ret void } -define weak void @parallel() "kernel" { +define weak ptx_kernel void @parallel() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@parallel ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @parallel_kernel_environment, ptr null) @@ -136,20 +136,13 @@ declare i32 @__kmpc_target_init(ptr, ptr) #1 declare void @__kmpc_target_deinit() #1 !llvm.module.flags = !{!0, !1} -!nvvm.annotations = !{!2, !3, !4} !0 = !{i32 7, !"openmp", i32 50} !1 = !{i32 7, !"openmp-device", i32 50} -!2 = !{ptr @none_spmd, !"kernel", i32 1} -!3 = !{ptr @spmd, !"kernel", i32 1} -!4 = !{ptr @parallel, !"kernel", i32 1} ;. ; CHECK: attributes #[[ATTR0]] = { "kernel" } ; CHECK: attributes #[[ATTR1]] = { alwaysinline } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CHECK: [[META2:![0-9]+]] = !{ptr @none_spmd, !"kernel", i32 1} -; CHECK: [[META3:![0-9]+]] = !{ptr @spmd, !"kernel", i32 1} -; CHECK: [[META4:![0-9]+]] = !{ptr @parallel, !"kernel", i32 1} ;. diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll index 31e3ef2b9079f6..29f2030c4d42b5 100644 --- a/llvm/test/Transforms/OpenMP/remove_globalization.ll +++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll @@ -40,7 +40,7 @@ define weak i32 @__kmpc_target_init(ptr %0, ptr) { } declare void @__kmpc_target_deinit() -define void @kernel(ptr %dyn) "kernel" { +define ptx_kernel void @kernel(ptr %dyn) "kernel" { ; CHECK-LABEL: define {{[^@]+}}@kernel ; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -98,14 +98,14 @@ define internal void @bar() { ; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4 -; CHECK-NEXT: call void @share(ptr nofree [[DOTH2S]]) #[[ATTR5:[0-9]+]], !dbg [[DBG8:![0-9]+]] +; CHECK-NEXT: call void @share(ptr nofree [[DOTH2S]]) #[[ATTR5:[0-9]+]], !dbg [[DBG7:![0-9]+]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@bar ; CHECK-DISABLED-SAME: () #[[ATTR1]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4 -; CHECK-DISABLED-NEXT: call void @share(ptr nofree [[DOTH2S]]) #[[ATTR5:[0-9]+]], !dbg [[DBG8:![0-9]+]] +; CHECK-DISABLED-NEXT: call void @share(ptr nofree [[DOTH2S]]) #[[ATTR5:[0-9]+]], !dbg [[DBG7:![0-9]+]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -146,7 +146,7 @@ define void @unused() { ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@unused() { ; CHECK-DISABLED-NEXT: entry: -; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]], !dbg [[DBG11:![0-9]+]] +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]], !dbg [[DBG10:![0-9]+]] ; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(ptr [[TMP0]], i64 4) #[[ATTR6]] ; CHECK-DISABLED-NEXT: ret void ; @@ -234,14 +234,12 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4, !6, !7} -!nvvm.annotations = !{!5} !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 13.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) !1 = !DIFile(filename: "remove_globalization.c", directory: "/tmp/remove_globalization.c") !2 = !{} !3 = !{i32 2, !"Debug Info Version", i32 3} !4 = !{i32 1, !"wchar_size", i32 4} -!5 = !{ptr @kernel, !"kernel", i32 1} !6 = !{i32 7, !"openmp", i32 50} !7 = !{i32 7, !"openmp-device", i32 50} !8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !11, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) @@ -276,10 +274,9 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} ; CHECK: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CHECK: [[META7:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1} -; CHECK: [[DBG8]] = !DILocation(line: 4, column: 2, scope: [[META9:![0-9]+]]) -; CHECK: [[META9]] = distinct !DISubprogram(name: "bar", scope: [[META1]], file: [[META1]], line: 1, type: [[META10:![0-9]+]], scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META2]]) -; CHECK: [[META10]] = !DISubroutineType(types: [[META2]]) +; CHECK: [[DBG7]] = !DILocation(line: 4, column: 2, scope: [[META8:![0-9]+]]) +; CHECK: [[META8]] = distinct !DISubprogram(name: "bar", scope: [[META1]], file: [[META1]], line: 1, type: [[META9:![0-9]+]], scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META2]]) +; CHECK: [[META9]] = !DISubroutineType(types: [[META2]]) ;. ; CHECK-DISABLED: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: [[META1:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META2:![0-9]+]], splitDebugInlining: false, nameTableKind: None) ; CHECK-DISABLED: [[META1]] = !DIFile(filename: "remove_globalization.c", directory: {{.*}}) @@ -288,11 +285,10 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK-DISABLED: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} ; CHECK-DISABLED: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK-DISABLED: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CHECK-DISABLED: [[META7:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1} -; CHECK-DISABLED: [[DBG8]] = !DILocation(line: 4, column: 2, scope: [[META9:![0-9]+]]) -; CHECK-DISABLED: [[META9]] = distinct !DISubprogram(name: "bar", scope: [[META1]], file: [[META1]], line: 1, type: [[META10:![0-9]+]], scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META2]]) -; CHECK-DISABLED: [[META10]] = !DISubroutineType(types: [[META2]]) -; CHECK-DISABLED: [[DBG11]] = !DILocation(line: 6, column: 2, scope: [[META9]]) +; CHECK-DISABLED: [[DBG7]] = !DILocation(line: 4, column: 2, scope: [[META8:![0-9]+]]) +; CHECK-DISABLED: [[META8]] = distinct !DISubprogram(name: "bar", scope: [[META1]], file: [[META1]], line: 1, type: [[META9:![0-9]+]], scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META2]]) +; CHECK-DISABLED: [[META9]] = !DISubroutineType(types: [[META2]]) +; CHECK-DISABLED: [[DBG10]] = !DILocation(line: 6, column: 2, scope: [[META8]]) ;. ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; CHECK-REMARKS: {{.*}} diff --git a/llvm/test/Transforms/OpenMP/replace_globalization.ll b/llvm/test/Transforms/OpenMP/replace_globalization.ll index 6e4fb9e57388ba..92cfd750492269 100644 --- a/llvm/test/Transforms/OpenMP/replace_globalization.ll +++ b/llvm/test/Transforms/OpenMP/replace_globalization.ll @@ -25,7 +25,7 @@ target triple = "nvptx64" @baz_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null } -define dso_local void @foo(ptr %dyn) "kernel" { +define dso_local ptx_kernel void @foo(ptr %dyn) "kernel" { entry: %c = call i32 @__kmpc_target_init(ptr @foo_kernel_environment, ptr %dyn) %x = call align 4 ptr @__kmpc_alloc_shared(i64 4) @@ -36,7 +36,7 @@ entry: ret void } -define void @bar(ptr %dyn) "kernel" { +define ptx_kernel void @bar(ptr %dyn) "kernel" { %c = call i32 @__kmpc_target_init(ptr @bar_kernel_environment, ptr %dyn) call void @unknown_no_openmp() %cmp = icmp eq i32 %c, -1 @@ -60,7 +60,7 @@ exit: ret void } -define void @baz_spmd(ptr %dyn) "kernel" { +define ptx_kernel void @baz_spmd(ptr %dyn) "kernel" { %c = call i32 @__kmpc_target_init(ptr @baz_kernel_environment, ptr %dyn) call void @unknown_no_openmp() %c0 = icmp eq i32 %c, -1 @@ -109,7 +109,6 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4, !5, !6} -!nvvm.annotations = !{!7, !8, !13} !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) !1 = !DIFile(filename: "replace_globalization.c", directory: "/tmp/replace_globalization.c") @@ -118,9 +117,6 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" !4 = !{i32 1, !"wchar_size", i32 4} !5 = !{i32 7, !"openmp", i32 50} !6 = !{i32 7, !"openmp-device", i32 50} -!7 = !{ptr @foo, !"kernel", i32 1} -!8 = !{ptr @bar, !"kernel", i32 1} -!13 = !{ptr @baz_spmd, !"kernel", i32 1} !9 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) !10 = !DISubroutineType(types: !2) !11 = !DILocation(line: 5, column: 7, scope: !9) @@ -177,7 +173,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[C]], -1 ; CHECK-NEXT: br i1 [[C0]], label [[MASTER3:%.*]], label [[EXIT:%.*]] ; CHECK: master3: -; CHECK-NEXT: [[Z:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 24) #[[ATTR6]], !dbg [[DBG10:![0-9]+]] +; CHECK-NEXT: [[Z:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 24) #[[ATTR6]], !dbg [[DBG7:![0-9]+]] ; CHECK-NEXT: call void @use.internalized(ptr nofree [[Z]]) #[[ATTR7]] ; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[Z]], i64 24) #[[ATTR8]] ; CHECK-NEXT: br label [[EXIT]] @@ -231,12 +227,9 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} ; CHECK: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CHECK: [[META7:![0-9]+]] = !{ptr @foo, !"kernel", i32 1} -; CHECK: [[META8:![0-9]+]] = !{ptr @bar, !"kernel", i32 1} -; CHECK: [[META9:![0-9]+]] = !{ptr @baz_spmd, !"kernel", i32 1} -; CHECK: [[DBG10]] = !DILocation(line: 5, column: 14, scope: [[META11:![0-9]+]]) -; CHECK: [[META11]] = distinct !DISubprogram(name: "bar", scope: [[META1]], file: [[META1]], line: 1, type: [[META12:![0-9]+]], scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META2]]) -; CHECK: [[META12]] = !DISubroutineType(types: [[META2]]) +; CHECK: [[DBG7]] = !DILocation(line: 5, column: 14, scope: [[META8:![0-9]+]]) +; CHECK: [[META8]] = distinct !DISubprogram(name: "bar", scope: [[META1]], file: [[META1]], line: 1, type: [[META9:![0-9]+]], scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META2]]) +; CHECK: [[META9]] = !DISubroutineType(types: [[META2]]) ;. ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; CHECK-LIMIT: {{.*}} diff --git a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll index c186e5f04f092d..70b9ce41c1a430 100644 --- a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll +++ b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll @@ -16,7 +16,7 @@ ; CHECK: [openmp-opt] Basic block @kernel if.then is executed by a single thread. ; CHECK-NOT: [openmp-opt] Basic block @kernel if.else is executed by a single thread. ; CHECK-NOT: [openmp-opt] Basic block @kernel if.end is executed by a single thread. -define void @kernel(ptr %dyn) "kernel" { +define ptx_kernel void @kernel(ptr %dyn) "kernel" { %call = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment, ptr %dyn) %cmp = icmp eq i32 %call, -1 br i1 %cmp, label %if.then, label %if.else @@ -116,7 +116,6 @@ attributes #0 = { cold noinline } !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4, !5, !6} -!nvvm.annotations = !{!7} !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) !1 = !DIFile(filename: "single_threaded_execution.c", directory: "/tmp/single_threaded_execution.c") @@ -125,7 +124,6 @@ attributes #0 = { cold noinline } !4 = !{i32 1, !"wchar_size", i32 4} !5 = !{i32 7, !"openmp", i32 50} !6 = !{i32 7, !"openmp-device", i32 50} -!7 = !{ptr @kernel, !"kernel", i32 1} !8 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 8, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) !9 = distinct !DISubprogram(name: "cold", scope: !1, file: !1, line: 8, type: !10, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) !10 = !DISubroutineType(types: !2) diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll index 6ff4b96b57556c..983175382f0f0f 100644 --- a/llvm/test/Transforms/OpenMP/spmdization.ll +++ b/llvm/test/Transforms/OpenMP/spmdization.ll @@ -105,36 +105,6 @@ @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null } @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null } -; AMDGPU-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; AMDGPU-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 -; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4 -; AMDGPU-DISABLED: @[[X_SHARED_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4 -; AMDGPU-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; AMDGPU-DISABLED: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; AMDGPU-DISABLED: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; AMDGPU-DISABLED: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; AMDGPU-DISABLED: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; NVPTX-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; NVPTX-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 -; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4 -; NVPTX-DISABLED: @[[X_SHARED1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4 -; NVPTX-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; NVPTX-DISABLED: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; NVPTX-DISABLED: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; NVPTX-DISABLED: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; NVPTX-DISABLED: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef ;. ; AMDGPU: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" ; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 @@ -226,7 +196,7 @@ ; NVPTX-DISABLED2: @__omp_outlined__7_wrapper.ID = private constant i8 undef ; NVPTX-DISABLED2: @__omp_outlined__9_wrapper.ID = private constant i8 undef ;. -define weak void @__omp_offloading_fd02_2044372e_sequential_loop_l5() #0 { +define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_l5() #0 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 ; AMDGPU-SAME: () #[[ATTR0:[0-9]+]] { ; AMDGPU-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() @@ -256,15 +226,6 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_l5() #0 { ; NVPTX-DISABLED2-SAME: () #[[ATTR0:[0-9]+]] { ; NVPTX-DISABLED2-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; NVPTX-DISABLED2-NEXT: ret void -; -; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 -; AMDGPU-DISABLED-SAME: () #[[ATTR0:[0-9]+]] { -; AMDGPU-DISABLED-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() -; AMDGPU-DISABLED-NEXT: ret void -; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 -; NVPTX-DISABLED-SAME: () #[[ATTR0:[0-9]+]] { -; NVPTX-DISABLED-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() -; NVPTX-DISABLED-NEXT: ret void call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ret void } @@ -282,7 +243,7 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; AMDGPU-NEXT: ret void ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]] -; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]] ; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; AMDGPU-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-NEXT: br label [[COMMON_RET]] @@ -299,7 +260,7 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; NVPTX-NEXT: ret void ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]] -; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]] ; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; NVPTX-NEXT: call void @__kmpc_target_deinit() ; NVPTX-NEXT: br label [[COMMON_RET]] @@ -350,7 +311,7 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; AMDGPU-DISABLED1-NEXT: ret void ; AMDGPU-DISABLED1: user_code.entry: ; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]] -; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] +; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]] ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]] @@ -401,7 +362,7 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; AMDGPU-DISABLED2-NEXT: ret void ; AMDGPU-DISABLED2: user_code.entry: ; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]] -; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] +; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]] ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]] @@ -451,7 +412,7 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; NVPTX-DISABLED1-NEXT: ret void ; NVPTX-DISABLED1: user_code.entry: ; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]] -; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] +; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]] ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit() ; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]] @@ -501,114 +462,10 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; NVPTX-DISABLED2-NEXT: ret void ; NVPTX-DISABLED2: user_code.entry: ; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]] -; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] +; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]] ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit() ; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] -; -; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug -; AMDGPU-DISABLED-SAME: () #[[ATTR1:[0-9]+]] { -; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment) -; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; AMDGPU-DISABLED: is_worker_check: -; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; AMDGPU-DISABLED: worker_state_machine.begin: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr -; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; AMDGPU-DISABLED: worker_state_machine.finished: -; AMDGPU-DISABLED-NEXT: ret void -; AMDGPU-DISABLED: worker_state_machine.is_active.check: -; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; AMDGPU-DISABLED: worker_state_machine.parallel_region.check: -; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__1_wrapper.ID -; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute: -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute: -; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; AMDGPU-DISABLED: worker_state_machine.parallel_region.end: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() -; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; AMDGPU-DISABLED: worker_state_machine.done.barrier: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; AMDGPU-DISABLED: thread.user_code.check: -; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED: common.ret: -; AMDGPU-DISABLED-NEXT: ret void -; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]] -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] -; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug -; NVPTX-DISABLED-SAME: () #[[ATTR1:[0-9]+]] { -; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment) -; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; NVPTX-DISABLED: is_worker_check: -; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; NVPTX-DISABLED: worker_state_machine.begin: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; NVPTX-DISABLED: worker_state_machine.finished: -; NVPTX-DISABLED-NEXT: ret void -; NVPTX-DISABLED: worker_state_machine.is_active.check: -; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; NVPTX-DISABLED: worker_state_machine.parallel_region.check: -; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__1_wrapper.ID -; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; NVPTX-DISABLED: worker_state_machine.parallel_region.execute: -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute: -; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; NVPTX-DISABLED: worker_state_machine.parallel_region.end: -; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() -; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; NVPTX-DISABLED: worker_state_machine.done.barrier: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; NVPTX-DISABLED: thread.user_code.check: -; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED: common.ret: -; NVPTX-DISABLED-NEXT: ret void -; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]] -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -643,10 +500,10 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %. ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] ; AMDGPU-NEXT: ret void ; AMDGPU: for.body: -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__ ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { @@ -661,10 +518,10 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %. ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] ; NVPTX-NEXT: ret void ; NVPTX: for.body: -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] ; ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__ ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { @@ -679,10 +536,10 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %. ; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] ; AMDGPU-DISABLED1-NEXT: ret void ; AMDGPU-DISABLED1: for.body: -; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] ; ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__ ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { @@ -697,10 +554,10 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %. ; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] ; AMDGPU-DISABLED2-NEXT: ret void ; AMDGPU-DISABLED2: for.body: -; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] ; ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__ ; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { @@ -715,10 +572,10 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %. ; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] ; NVPTX-DISABLED1-NEXT: ret void ; NVPTX-DISABLED1: for.body: -; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] ; ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__ ; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { @@ -733,45 +590,10 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %. ; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] ; NVPTX-DISABLED2-NEXT: ret void ; NVPTX-DISABLED2: for.body: -; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] -; -; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ -; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]] -; AMDGPU-DISABLED: for.cond: -; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] -; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; AMDGPU-DISABLED: for.cond.cleanup: -; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] -; AMDGPU-DISABLED-NEXT: ret void -; AMDGPU-DISABLED: for.body: -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] -; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ -; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]] -; NVPTX-DISABLED: for.cond: -; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] -; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; NVPTX-DISABLED: for.cond.cleanup: -; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] -; NVPTX-DISABLED-NEXT: ret void -; NVPTX-DISABLED: for.body: -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] entry: %captured_vars_addrs = alloca [0 x ptr], align 8 br label %for.cond @@ -829,17 +651,6 @@ define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias % ; NVPTX-DISABLED2-NEXT: entry: ; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] ; NVPTX-DISABLED2-NEXT: ret void -; -; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 -; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] -; AMDGPU-DISABLED-NEXT: ret void -; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 -; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] -; NVPTX-DISABLED-NEXT: ret void entry: call void @unknown() #11 ret void @@ -906,25 +717,6 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; NVPTX-DISABLED2-NEXT: ret void -; -; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { -; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: ret void -; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { -; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: ret void entry: %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 @@ -937,7 +729,7 @@ entry: } ; Function Attrs: alwaysinline convergent norecurse nounwind -define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20() #0 { +define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20() #0 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 ; AMDGPU-SAME: () #[[ATTR0]] { ; AMDGPU-NEXT: entry: @@ -950,7 +742,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2 ; AMDGPU-NEXT: ret void ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] ; AMDGPU-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; AMDGPU-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-NEXT: br label [[COMMON_RET]] @@ -967,7 +759,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2 ; NVPTX-NEXT: ret void ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] ; NVPTX-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; NVPTX-NEXT: call void @__kmpc_target_deinit() ; NVPTX-NEXT: br label [[COMMON_RET]] @@ -1018,7 +810,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2 ; AMDGPU-DISABLED1-NEXT: ret void ; AMDGPU-DISABLED1: user_code.entry: ; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]] @@ -1069,7 +861,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2 ; AMDGPU-DISABLED2-NEXT: ret void ; AMDGPU-DISABLED2: user_code.entry: ; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]] @@ -1119,7 +911,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2 ; NVPTX-DISABLED1-NEXT: ret void ; NVPTX-DISABLED1: user_code.entry: ; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit() ; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]] @@ -1169,114 +961,10 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2 ; NVPTX-DISABLED2-NEXT: ret void ; NVPTX-DISABLED2: user_code.entry: ; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit() ; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] -; -; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 -; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { -; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment) -; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; AMDGPU-DISABLED: is_worker_check: -; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; AMDGPU-DISABLED: worker_state_machine.begin: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr -; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; AMDGPU-DISABLED: worker_state_machine.finished: -; AMDGPU-DISABLED-NEXT: ret void -; AMDGPU-DISABLED: worker_state_machine.is_active.check: -; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; AMDGPU-DISABLED: worker_state_machine.parallel_region.check: -; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__3_wrapper.ID -; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute: -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute: -; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; AMDGPU-DISABLED: worker_state_machine.parallel_region.end: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() -; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; AMDGPU-DISABLED: worker_state_machine.done.barrier: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; AMDGPU-DISABLED: thread.user_code.check: -; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED: common.ret: -; AMDGPU-DISABLED-NEXT: ret void -; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] -; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 -; NVPTX-DISABLED-SAME: () #[[ATTR0]] { -; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment) -; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; NVPTX-DISABLED: is_worker_check: -; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; NVPTX-DISABLED: worker_state_machine.begin: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; NVPTX-DISABLED: worker_state_machine.finished: -; NVPTX-DISABLED-NEXT: ret void -; NVPTX-DISABLED: worker_state_machine.is_active.check: -; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; NVPTX-DISABLED: worker_state_machine.parallel_region.check: -; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__3_wrapper.ID -; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; NVPTX-DISABLED: worker_state_machine.parallel_region.execute: -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute: -; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; NVPTX-DISABLED: worker_state_machine.parallel_region.end: -; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() -; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; NVPTX-DISABLED: worker_state_machine.done.barrier: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; NVPTX-DISABLED: thread.user_code.check: -; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED: common.ret: -; NVPTX-DISABLED-NEXT: ret void -; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -1314,10 +1002,10 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-NEXT: ret void ; AMDGPU: for.body: -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { @@ -1334,10 +1022,10 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias % ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-NEXT: ret void ; NVPTX: for.body: -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] ; ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__2 ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { @@ -1355,10 +1043,10 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-DISABLED1-NEXT: ret void ; AMDGPU-DISABLED1: for.body: -; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] ; ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__2 ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { @@ -1376,10 +1064,10 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-DISABLED2-NEXT: ret void ; AMDGPU-DISABLED2: for.body: -; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] ; ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__2 ; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { @@ -1396,10 +1084,10 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias % ; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-DISABLED1-NEXT: ret void ; NVPTX-DISABLED1: for.body: -; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] ; ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__2 ; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { @@ -1416,50 +1104,10 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias % ; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-DISABLED2-NEXT: ret void ; NVPTX-DISABLED2: for.body: -; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] -; -; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 -; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5) -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-DISABLED-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr -; AMDGPU-DISABLED-NEXT: call void @use(ptr nocapture [[MALLOC_CAST]]) #[[ATTR7]] -; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]] -; AMDGPU-DISABLED: for.cond: -; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] -; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; AMDGPU-DISABLED: for.cond.cleanup: -; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] -; AMDGPU-DISABLED-NEXT: ret void -; AMDGPU-DISABLED: for.body: -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] -; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 -; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4 -; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX-DISABLED-NEXT: call void @use(ptr nocapture [[X_H2S]]) #[[ATTR7]] -; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]] -; NVPTX-DISABLED: for.cond: -; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] -; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; NVPTX-DISABLED: for.cond.cleanup: -; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] -; NVPTX-DISABLED-NEXT: ret void -; NVPTX-DISABLED: for.body: -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] entry: %captured_vars_addrs = alloca [0 x ptr], align 8 %x = call align 4 ptr @__kmpc_alloc_shared(i64 4) @@ -1519,17 +1167,6 @@ define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias % ; NVPTX-DISABLED2-NEXT: entry: ; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-DISABLED2-NEXT: ret void -; -; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 -; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]] -; AMDGPU-DISABLED-NEXT: ret void -; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 -; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]] -; NVPTX-DISABLED-NEXT: ret void entry: call void @unknown() #11 ret void @@ -1596,25 +1233,6 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; NVPTX-DISABLED2-NEXT: ret void -; -; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: ret void -; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: ret void entry: %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 @@ -1628,7 +1246,7 @@ entry: ; Function Attrs: alwaysinline convergent norecurse nounwind -define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35() #0 { +define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35() #0 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 ; AMDGPU-SAME: () #[[ATTR0]] { ; AMDGPU-NEXT: entry: @@ -1641,7 +1259,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l ; AMDGPU-NEXT: ret void ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] ; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; AMDGPU-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-NEXT: br label [[COMMON_RET]] @@ -1658,7 +1276,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l ; NVPTX-NEXT: ret void ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] ; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; NVPTX-NEXT: call void @__kmpc_target_deinit() ; NVPTX-NEXT: br label [[COMMON_RET]] @@ -1709,7 +1327,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l ; AMDGPU-DISABLED1-NEXT: ret void ; AMDGPU-DISABLED1: user_code.entry: ; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]] @@ -1760,7 +1378,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l ; AMDGPU-DISABLED2-NEXT: ret void ; AMDGPU-DISABLED2: user_code.entry: ; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]] @@ -1810,7 +1428,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l ; NVPTX-DISABLED1-NEXT: ret void ; NVPTX-DISABLED1: user_code.entry: ; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit() ; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]] @@ -1860,114 +1478,10 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l ; NVPTX-DISABLED2-NEXT: ret void ; NVPTX-DISABLED2: user_code.entry: ; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit() ; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] -; -; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 -; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { -; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment) -; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; AMDGPU-DISABLED: is_worker_check: -; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; AMDGPU-DISABLED: worker_state_machine.begin: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr -; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; AMDGPU-DISABLED: worker_state_machine.finished: -; AMDGPU-DISABLED-NEXT: ret void -; AMDGPU-DISABLED: worker_state_machine.is_active.check: -; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; AMDGPU-DISABLED: worker_state_machine.parallel_region.check: -; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__5_wrapper.ID -; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute: -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute: -; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; AMDGPU-DISABLED: worker_state_machine.parallel_region.end: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() -; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; AMDGPU-DISABLED: worker_state_machine.done.barrier: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; AMDGPU-DISABLED: thread.user_code.check: -; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED: common.ret: -; AMDGPU-DISABLED-NEXT: ret void -; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] -; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 -; NVPTX-DISABLED-SAME: () #[[ATTR0]] { -; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment) -; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; NVPTX-DISABLED: is_worker_check: -; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; NVPTX-DISABLED: worker_state_machine.begin: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; NVPTX-DISABLED: worker_state_machine.finished: -; NVPTX-DISABLED-NEXT: ret void -; NVPTX-DISABLED: worker_state_machine.is_active.check: -; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; NVPTX-DISABLED: worker_state_machine.parallel_region.check: -; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__5_wrapper.ID -; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; NVPTX-DISABLED: worker_state_machine.parallel_region.execute: -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute: -; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; NVPTX-DISABLED: worker_state_machine.parallel_region.end: -; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() -; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; NVPTX-DISABLED: worker_state_machine.done.barrier: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; NVPTX-DISABLED: thread.user_code.check: -; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED: common.ret: -; NVPTX-DISABLED-NEXT: ret void -; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -2002,11 +1516,11 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-NEXT: ret void ; AMDGPU: for.body: -; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]] -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]] +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { @@ -2021,11 +1535,11 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias % ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-NEXT: ret void ; NVPTX: for.body: -; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]] -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]] +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] ; ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__4 ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { @@ -2040,11 +1554,11 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-DISABLED1-NEXT: ret void ; AMDGPU-DISABLED1: for.body: -; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]] -; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]] +; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) ; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] ; ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__4 ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { @@ -2059,11 +1573,11 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-DISABLED2-NEXT: ret void ; AMDGPU-DISABLED2: for.body: -; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]] -; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]] +; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) ; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] ; ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__4 ; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { @@ -2078,11 +1592,11 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias % ; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-DISABLED1-NEXT: ret void ; NVPTX-DISABLED1: for.body: -; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]] -; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]] +; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) ; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] ; ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__4 ; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { @@ -2097,48 +1611,11 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias % ; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-DISABLED2-NEXT: ret void ; NVPTX-DISABLED2: for.body: -; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]] -; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]] +; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) ; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] -; -; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 -; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 -; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]] -; AMDGPU-DISABLED: for.cond: -; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] -; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; AMDGPU-DISABLED: for.cond.cleanup: -; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] -; AMDGPU-DISABLED-NEXT: ret void -; AMDGPU-DISABLED: for.body: -; AMDGPU-DISABLED-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]] -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] -; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 -; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 -; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]] -; NVPTX-DISABLED: for.cond: -; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] -; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; NVPTX-DISABLED: for.cond.cleanup: -; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] -; NVPTX-DISABLED-NEXT: ret void -; NVPTX-DISABLED: for.body: -; NVPTX-DISABLED-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]] -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] entry: %captured_vars_addrs = alloca [1 x ptr], align 8 %x = call align 4 ptr @__kmpc_alloc_shared(i64 4) @@ -2167,73 +1644,56 @@ define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] ; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] +; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] ; AMDGPU-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] ; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] +; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] ; NVPTX-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5 ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { ; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] ; AMDGPU-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; AMDGPU-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] ; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-DISABLED1-NEXT: ret void ; ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5 ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { ; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] ; AMDGPU-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; AMDGPU-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] ; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-DISABLED2-NEXT: ret void ; ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5 ; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { ; NVPTX-DISABLED1-NEXT: entry: -; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] ; NVPTX-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; NVPTX-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] ; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-DISABLED1-NEXT: ret void ; ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5 ; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { ; NVPTX-DISABLED2-NEXT: entry: -; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] ; NVPTX-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; NVPTX-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] ; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-DISABLED2-NEXT: ret void -; -; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 -; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { -; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; AMDGPU-DISABLED-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]] -; AMDGPU-DISABLED-NEXT: ret void -; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 -; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { -; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; NVPTX-DISABLED-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]] -; NVPTX-DISABLED-NEXT: ret void entry: %0 = load i32, ptr %x, align 4, !tbaa !18 %inc = add nsw i32 %0, 1 @@ -2252,7 +1712,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] +; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] ; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] ; AMDGPU-NEXT: ret void ; @@ -2264,7 +1724,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] +; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] ; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] ; NVPTX-NEXT: ret void ; @@ -2276,7 +1736,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] +; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] ; AMDGPU-DISABLED1-NEXT: ret void ; @@ -2288,7 +1748,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] +; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] ; AMDGPU-DISABLED2-NEXT: ret void ; @@ -2300,7 +1760,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] +; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] ; NVPTX-DISABLED1-NEXT: ret void ; @@ -2312,32 +1772,9 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] +; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] ; NVPTX-DISABLED2-NEXT: ret void -; -; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper -; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: ret void -; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper -; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: ret void entry: %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 @@ -2352,7 +1789,7 @@ entry: } ; Function Attrs: alwaysinline convergent norecurse nounwind -define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50() #0 { +define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50() #0 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 ; AMDGPU-SAME: () #[[ATTR0]] { ; AMDGPU-NEXT: entry: @@ -2365,7 +1802,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g ; AMDGPU-NEXT: ret void ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] ; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; AMDGPU-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-NEXT: br label [[COMMON_RET]] @@ -2382,7 +1819,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g ; NVPTX-NEXT: ret void ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] ; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; NVPTX-NEXT: call void @__kmpc_target_deinit() ; NVPTX-NEXT: br label [[COMMON_RET]] @@ -2433,7 +1870,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g ; AMDGPU-DISABLED1-NEXT: ret void ; AMDGPU-DISABLED1: user_code.entry: ; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]] @@ -2484,7 +1921,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g ; AMDGPU-DISABLED2-NEXT: ret void ; AMDGPU-DISABLED2: user_code.entry: ; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]] @@ -2534,7 +1971,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g ; NVPTX-DISABLED1-NEXT: ret void ; NVPTX-DISABLED1: user_code.entry: ; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit() ; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]] @@ -2584,114 +2021,10 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g ; NVPTX-DISABLED2-NEXT: ret void ; NVPTX-DISABLED2: user_code.entry: ; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit() ; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] -; -; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 -; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { -; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment) -; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; AMDGPU-DISABLED: is_worker_check: -; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; AMDGPU-DISABLED: worker_state_machine.begin: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr -; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; AMDGPU-DISABLED: worker_state_machine.finished: -; AMDGPU-DISABLED-NEXT: ret void -; AMDGPU-DISABLED: worker_state_machine.is_active.check: -; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; AMDGPU-DISABLED: worker_state_machine.parallel_region.check: -; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__7_wrapper.ID -; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute: -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute: -; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; AMDGPU-DISABLED: worker_state_machine.parallel_region.end: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() -; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; AMDGPU-DISABLED: worker_state_machine.done.barrier: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; AMDGPU-DISABLED: thread.user_code.check: -; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED: common.ret: -; AMDGPU-DISABLED-NEXT: ret void -; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] -; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 -; NVPTX-DISABLED-SAME: () #[[ATTR0]] { -; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment) -; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; NVPTX-DISABLED: is_worker_check: -; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; NVPTX-DISABLED: worker_state_machine.begin: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; NVPTX-DISABLED: worker_state_machine.finished: -; NVPTX-DISABLED-NEXT: ret void -; NVPTX-DISABLED: worker_state_machine.is_active.check: -; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; NVPTX-DISABLED: worker_state_machine.parallel_region.check: -; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__7_wrapper.ID -; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; NVPTX-DISABLED: worker_state_machine.parallel_region.execute: -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute: -; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; NVPTX-DISABLED: worker_state_machine.parallel_region.end: -; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() -; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; NVPTX-DISABLED: worker_state_machine.done.barrier: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; NVPTX-DISABLED: thread.user_code.check: -; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED: common.ret: -; NVPTX-DISABLED-NEXT: ret void -; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -2723,7 +2056,7 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 ; AMDGPU-NEXT: br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] ; AMDGPU: region.guarded: -; AMDGPU-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA18]] +; AMDGPU-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]] ; AMDGPU-NEXT: br label [[REGION_GUARDED_END:%.*]] ; AMDGPU: region.guarded.end: ; AMDGPU-NEXT: br label [[REGION_BARRIER]] @@ -2740,11 +2073,11 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-NEXT: ret void ; AMDGPU: for.body: -; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]] -; AMDGPU-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]] +; AMDGPU-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { @@ -2756,7 +2089,7 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias % ; NVPTX-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 ; NVPTX-NEXT: br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] ; NVPTX: region.guarded: -; NVPTX-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA18]] +; NVPTX-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA12]] ; NVPTX-NEXT: br label [[REGION_GUARDED_END:%.*]] ; NVPTX: region.guarded.end: ; NVPTX-NEXT: br label [[REGION_BARRIER]] @@ -2773,17 +2106,17 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias % ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-NEXT: ret void ; NVPTX: for.body: -; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]] -; NVPTX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]] +; NVPTX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] ; ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__6 ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { ; AMDGPU-DISABLED1-NEXT: entry: ; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 -; AMDGPU-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]] ; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]] ; AMDGPU-DISABLED1: for.cond: ; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -2793,17 +2126,17 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-DISABLED1-NEXT: ret void ; AMDGPU-DISABLED1: for.body: -; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]] -; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]] +; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) ; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] ; ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__6 ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { ; AMDGPU-DISABLED2-NEXT: entry: ; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 -; AMDGPU-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]] ; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]] ; AMDGPU-DISABLED2: for.cond: ; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -2813,17 +2146,17 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-DISABLED2-NEXT: ret void ; AMDGPU-DISABLED2: for.body: -; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]] -; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]] +; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) ; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] ; ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__6 ; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { ; NVPTX-DISABLED1-NEXT: entry: ; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 -; NVPTX-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA12]] ; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]] ; NVPTX-DISABLED1: for.cond: ; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -2833,17 +2166,17 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias % ; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-DISABLED1-NEXT: ret void ; NVPTX-DISABLED1: for.body: -; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]] -; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]] +; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) ; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] ; ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__6 ; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { ; NVPTX-DISABLED2-NEXT: entry: ; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 -; NVPTX-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA12]] ; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]] ; NVPTX-DISABLED2: for.cond: ; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -2853,50 +2186,11 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias % ; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-DISABLED2-NEXT: ret void ; NVPTX-DISABLED2: for.body: -; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]] -; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]] +; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) ; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] -; -; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 -; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 -; AMDGPU-DISABLED-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]] -; AMDGPU-DISABLED: for.cond: -; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] -; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; AMDGPU-DISABLED: for.cond.cleanup: -; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] -; AMDGPU-DISABLED-NEXT: ret void -; AMDGPU-DISABLED: for.body: -; AMDGPU-DISABLED-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]] -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] -; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 -; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 -; NVPTX-DISABLED-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]] -; NVPTX-DISABLED: for.cond: -; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] -; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; NVPTX-DISABLED: for.cond.cleanup: -; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] -; NVPTX-DISABLED-NEXT: ret void -; NVPTX-DISABLED: for.body: -; NVPTX-DISABLED-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]] -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] entry: %captured_vars_addrs = alloca [1 x ptr], align 8 %x = call align 4 ptr @__kmpc_alloc_shared(i64 4) @@ -2926,73 +2220,56 @@ define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] ; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] +; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] ; AMDGPU-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] ; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] +; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] ; NVPTX-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7 ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { ; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] ; AMDGPU-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; AMDGPU-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] ; AMDGPU-DISABLED1-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]] ; AMDGPU-DISABLED1-NEXT: ret void ; ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7 ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { ; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] ; AMDGPU-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; AMDGPU-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] ; AMDGPU-DISABLED2-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]] ; AMDGPU-DISABLED2-NEXT: ret void ; ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7 ; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { ; NVPTX-DISABLED1-NEXT: entry: -; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] ; NVPTX-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; NVPTX-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] ; NVPTX-DISABLED1-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]] ; NVPTX-DISABLED1-NEXT: ret void ; ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7 ; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { ; NVPTX-DISABLED2-NEXT: entry: -; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] ; NVPTX-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; NVPTX-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] ; NVPTX-DISABLED2-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]] ; NVPTX-DISABLED2-NEXT: ret void -; -; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 -; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { -; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; AMDGPU-DISABLED-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]] -; AMDGPU-DISABLED-NEXT: ret void -; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 -; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { -; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; NVPTX-DISABLED-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]] -; NVPTX-DISABLED-NEXT: ret void entry: %0 = load i32, ptr %x, align 4, !tbaa !18 %inc = add nsw i32 %0, 1 @@ -3011,7 +2288,7 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] +; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] ; AMDGPU-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] ; AMDGPU-NEXT: ret void ; @@ -3023,7 +2300,7 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] +; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] ; NVPTX-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] ; NVPTX-NEXT: ret void ; @@ -3035,7 +2312,7 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] +; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] ; AMDGPU-DISABLED1-NEXT: ret void ; @@ -3047,7 +2324,7 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] +; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] ; AMDGPU-DISABLED2-NEXT: ret void ; @@ -3059,7 +2336,7 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] +; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] ; NVPTX-DISABLED1-NEXT: ret void ; @@ -3071,32 +2348,9 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] +; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] ; NVPTX-DISABLED2-NEXT: ret void -; -; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper -; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: ret void -; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper -; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: ret void entry: %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 @@ -3111,7 +2365,7 @@ entry: } ; Function Attrs: alwaysinline convergent norecurse nounwind -define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 { +define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 ; AMDGPU-SAME: () #[[ATTR0]] { ; AMDGPU-NEXT: entry: @@ -3384,96 +2638,6 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit() ; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] -; -; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 -; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { -; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment) -; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; AMDGPU-DISABLED: is_worker_check: -; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; AMDGPU-DISABLED: worker_state_machine.begin: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr -; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; AMDGPU-DISABLED: worker_state_machine.finished: -; AMDGPU-DISABLED-NEXT: ret void -; AMDGPU-DISABLED: worker_state_machine.is_active.check: -; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute: -; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; AMDGPU-DISABLED: worker_state_machine.parallel_region.end: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() -; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; AMDGPU-DISABLED: worker_state_machine.done.barrier: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; AMDGPU-DISABLED: thread.user_code.check: -; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED: common.ret: -; AMDGPU-DISABLED-NEXT: ret void -; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] -; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 -; NVPTX-DISABLED-SAME: () #[[ATTR0]] { -; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment) -; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; NVPTX-DISABLED: is_worker_check: -; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; NVPTX-DISABLED: worker_state_machine.begin: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; NVPTX-DISABLED: worker_state_machine.finished: -; NVPTX-DISABLED-NEXT: ret void -; NVPTX-DISABLED: worker_state_machine.is_active.check: -; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute: -; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; NVPTX-DISABLED: worker_state_machine.parallel_region.end: -; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() -; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; NVPTX-DISABLED: worker_state_machine.done.barrier: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; NVPTX-DISABLED: thread.user_code.check: -; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED: common.ret: -; NVPTX-DISABLED-NEXT: ret void -; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -3530,24 +2694,13 @@ define internal void @__omp_outlined__8(ptr noalias %.global_tid., ptr noalias % ; NVPTX-DISABLED2-NEXT: entry: ; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-DISABLED2-NEXT: ret void -; -; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 -; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]] -; AMDGPU-DISABLED-NEXT: ret void -; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 -; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]] -; NVPTX-DISABLED-NEXT: ret void entry: call void @unknown() #11 ret void } ; Function Attrs: alwaysinline convergent norecurse nounwind -define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { +define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 ; AMDGPU-SAME: () #[[ATTR0]] { ; AMDGPU-NEXT: entry: @@ -3862,110 +3015,6 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { ; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit() ; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] -; -; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 -; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { -; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment) -; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; AMDGPU-DISABLED: is_worker_check: -; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; AMDGPU-DISABLED: worker_state_machine.begin: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr -; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; AMDGPU-DISABLED: worker_state_machine.finished: -; AMDGPU-DISABLED-NEXT: ret void -; AMDGPU-DISABLED: worker_state_machine.is_active.check: -; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; AMDGPU-DISABLED: worker_state_machine.parallel_region.check: -; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID -; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute: -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute: -; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; AMDGPU-DISABLED: worker_state_machine.parallel_region.end: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() -; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; AMDGPU-DISABLED: worker_state_machine.done.barrier: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; AMDGPU-DISABLED: thread.user_code.check: -; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED: common.ret: -; AMDGPU-DISABLED-NEXT: ret void -; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] -; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 -; NVPTX-DISABLED-SAME: () #[[ATTR0]] { -; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment) -; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; NVPTX-DISABLED: is_worker_check: -; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; NVPTX-DISABLED: worker_state_machine.begin: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; NVPTX-DISABLED: worker_state_machine.finished: -; NVPTX-DISABLED-NEXT: ret void -; NVPTX-DISABLED: worker_state_machine.is_active.check: -; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; NVPTX-DISABLED: worker_state_machine.parallel_region.check: -; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID -; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; NVPTX-DISABLED: worker_state_machine.parallel_region.execute: -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute: -; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; NVPTX-DISABLED: worker_state_machine.parallel_region.end: -; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() -; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; NVPTX-DISABLED: worker_state_machine.done.barrier: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; NVPTX-DISABLED: thread.user_code.check: -; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED: common.ret: -; NVPTX-DISABLED-NEXT: ret void -; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]] -; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] entry: %captured_vars_addrs = alloca [0 x ptr], align 8 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null) @@ -4021,17 +3070,6 @@ define internal void @.omp_outlined.(i32 %.global_tid., ptr noalias %.part_id., ; NVPTX-DISABLED2-NEXT: entry: ; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-DISABLED2-NEXT: ret void -; -; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@.omp_outlined. -; AMDGPU-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { -; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] -; AMDGPU-DISABLED-NEXT: ret void -; NVPTX-DISABLED-LABEL: define {{[^@]+}}@.omp_outlined. -; NVPTX-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { -; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] -; NVPTX-DISABLED-NEXT: ret void entry: call void @spmd_amenable() #10 ret void @@ -4093,13 +3131,6 @@ define weak i32 @__kmpc_target_init(ptr, ptr) { ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__kmpc_target_init ; NVPTX-DISABLED2-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { ; NVPTX-DISABLED2-NEXT: ret i32 0 -; -; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init -; AMDGPU-DISABLED-SAME: (ptr [[TMP0:%.*]]) { -; AMDGPU-DISABLED-NEXT: ret i32 0 -; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init -; NVPTX-DISABLED-SAME: (ptr [[TMP0:%.*]]) { -; NVPTX-DISABLED-NEXT: ret i32 0 ret i32 0 } @@ -4158,16 +3189,6 @@ define internal void @__omp_outlined__9(ptr noalias %.global_tid., ptr noalias % ; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-DISABLED2-NEXT: ret void ; -; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9 -; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]] -; AMDGPU-DISABLED-NEXT: ret void -; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9 -; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]] -; NVPTX-DISABLED-NEXT: ret void entry: call void @unknown() #11 ret void @@ -4234,25 +3255,6 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; NVPTX-DISABLED2-NEXT: ret void -; -; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper -; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: ret void -; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper -; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: ret void entry: %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 @@ -4280,7 +3282,6 @@ attributes #10 = { convergent "llvm.assume"="ompx_spmd_amenable" } attributes #11 = { convergent } !omp_offload.info = !{!0, !1, !2, !3, !4, !5} -!nvvm.annotations = !{!6, !7, !8, !9, !10, !11} !llvm.module.flags = !{!12, !13, !14, !15, !16} !llvm.ident = !{!17} @@ -4290,12 +3291,6 @@ attributes #11 = { convergent } !3 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} !4 = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} !5 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} -!6 = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} -!7 = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} -!8 = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} -!9 = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} -!10 = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} -!11 = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} !12 = !{i32 1, !"wchar_size", i32 4} !13 = !{i32 7, !"openmp", i32 50} !14 = !{i32 7, !"openmp-device", i32 50} @@ -4317,92 +3312,6 @@ attributes #11 = { convergent } !30 = !{!31, !27, i64 0} !31 = !{!"kmp_task_t_with_privates", !32, i64 0} !32 = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32} -; AMDGPU-DISABLED: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } -; AMDGPU-DISABLED: attributes #[[ATTR1]] = { norecurse } -; AMDGPU-DISABLED: attributes #[[ATTR2]] = { convergent norecurse nounwind } -; AMDGPU-DISABLED: attributes #[[ATTR3]] = { alwaysinline norecurse nounwind } -; AMDGPU-DISABLED: attributes #[[ATTR4]] = { alwaysinline convergent nounwind } -; AMDGPU-DISABLED: attributes #[[ATTR5]] = { nounwind } -; AMDGPU-DISABLED: attributes #[[ATTR6:[0-9]+]] = { nosync nounwind } -; AMDGPU-DISABLED: attributes #[[ATTR7:[0-9]+]] = { nofree nosync nounwind allocsize(0) } -; AMDGPU-DISABLED: attributes #[[ATTR8]] = { convergent "llvm.assume"="ompx_spmd_amenable" } -; AMDGPU-DISABLED: attributes #[[ATTR9]] = { convergent } -; AMDGPU-DISABLED: attributes #[[ATTR10:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } -; AMDGPU-DISABLED: attributes #[[ATTR11:[0-9]+]] = { alwaysinline } -; AMDGPU-DISABLED: attributes #[[ATTR12:[0-9]+]] = { convergent nounwind } -; NVPTX-DISABLED: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } -; NVPTX-DISABLED: attributes #[[ATTR1]] = { norecurse } -; NVPTX-DISABLED: attributes #[[ATTR2]] = { convergent norecurse nounwind } -; NVPTX-DISABLED: attributes #[[ATTR3]] = { alwaysinline norecurse nounwind } -; NVPTX-DISABLED: attributes #[[ATTR4]] = { alwaysinline convergent nounwind } -; NVPTX-DISABLED: attributes #[[ATTR5]] = { nounwind } -; NVPTX-DISABLED: attributes #[[ATTR6:[0-9]+]] = { nosync nounwind } -; NVPTX-DISABLED: attributes #[[ATTR7:[0-9]+]] = { nofree nosync nounwind allocsize(0) } -; NVPTX-DISABLED: attributes #[[ATTR8]] = { convergent "llvm.assume"="ompx_spmd_amenable" } -; NVPTX-DISABLED: attributes #[[ATTR9]] = { convergent } -; NVPTX-DISABLED: attributes #[[ATTR10:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } -; NVPTX-DISABLED: attributes #[[ATTR11:[0-9]+]] = { alwaysinline } -; NVPTX-DISABLED: attributes #[[ATTR12:[0-9]+]] = { convergent nounwind } -; AMDGPU-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} -; AMDGPU-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} -; AMDGPU-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} -; AMDGPU-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} -; AMDGPU-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} -; AMDGPU-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} -; AMDGPU-DISABLED: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; AMDGPU-DISABLED: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; AMDGPU-DISABLED: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; AMDGPU-DISABLED: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} -; AMDGPU-DISABLED: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} -; AMDGPU-DISABLED: [[META17:![0-9]+]] = !{!"clang version 14.0.0"} -; AMDGPU-DISABLED: [[TBAA18]] = !{!19, !19, i64 0} -; AMDGPU-DISABLED: [[META19:![0-9]+]] = !{!"int", !20, i64 0} -; AMDGPU-DISABLED: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0} -; AMDGPU-DISABLED: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"} -; AMDGPU-DISABLED: [[LOOP22]] = distinct !{!22, !23, !24} -; AMDGPU-DISABLED: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"} -; AMDGPU-DISABLED: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"} -; AMDGPU-DISABLED: [[LOOP25]] = distinct !{!25, !23, !24} -; AMDGPU-DISABLED: [[TBAA26]] = !{!27, !27, i64 0} -; AMDGPU-DISABLED: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0} -; AMDGPU-DISABLED: [[LOOP28]] = distinct !{!28, !23, !24} -; AMDGPU-DISABLED: [[LOOP29]] = distinct !{!29, !23, !24} -; NVPTX-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} -; NVPTX-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} -; NVPTX-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} -; NVPTX-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} -; NVPTX-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} -; NVPTX-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} -; NVPTX-DISABLED: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} -; NVPTX-DISABLED: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} -; NVPTX-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} -; NVPTX-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} -; NVPTX-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} -; NVPTX-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} -; NVPTX-DISABLED: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; NVPTX-DISABLED: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; NVPTX-DISABLED: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; NVPTX-DISABLED: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} -; NVPTX-DISABLED: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} -; NVPTX-DISABLED: [[META17:![0-9]+]] = !{!"clang version 14.0.0"} -; NVPTX-DISABLED: [[TBAA18]] = !{!19, !19, i64 0} -; NVPTX-DISABLED: [[META19:![0-9]+]] = !{!"int", !20, i64 0} -; NVPTX-DISABLED: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0} -; NVPTX-DISABLED: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"} -; NVPTX-DISABLED: [[LOOP22]] = distinct !{!22, !23, !24} -; NVPTX-DISABLED: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"} -; NVPTX-DISABLED: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"} -; NVPTX-DISABLED: [[LOOP25]] = distinct !{!25, !23, !24} -; NVPTX-DISABLED: [[TBAA26]] = !{!27, !27, i64 0} -; NVPTX-DISABLED: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0} -; NVPTX-DISABLED: [[LOOP28]] = distinct !{!28, !23, !24} -; NVPTX-DISABLED: [[LOOP29]] = distinct !{!29, !23, !24} ;. ; AMDGPU: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" } ; AMDGPU: attributes #[[ATTR1]] = { norecurse } @@ -4488,30 +3397,24 @@ attributes #11 = { convergent } ; AMDGPU: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} ; AMDGPU: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} ; AMDGPU: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} -; AMDGPU: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} -; AMDGPU: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} -; AMDGPU: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} -; AMDGPU: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} -; AMDGPU: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} -; AMDGPU: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} -; AMDGPU: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; AMDGPU: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; AMDGPU: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; AMDGPU: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} -; AMDGPU: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} -; AMDGPU: [[META17:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -; AMDGPU: [[TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0} -; AMDGPU: [[META19]] = !{!"int", [[META20:![0-9]+]], i64 0} -; AMDGPU: [[META20]] = !{!"omnipotent char", [[META21:![0-9]+]], i64 0} -; AMDGPU: [[META21]] = !{!"Simple C/C++ TBAA"} -; AMDGPU: [[LOOP22]] = distinct !{[[LOOP22]], [[META23:![0-9]+]], [[META24:![0-9]+]]} -; AMDGPU: [[META23]] = !{!"llvm.loop.mustprogress"} -; AMDGPU: [[META24]] = !{!"llvm.loop.unroll.disable"} -; AMDGPU: [[LOOP25]] = distinct !{[[LOOP25]], [[META23]], [[META24]]} -; AMDGPU: [[TBAA26]] = !{[[META27:![0-9]+]], [[META27]], i64 0} -; AMDGPU: [[META27]] = !{!"any pointer", [[META20]], i64 0} -; AMDGPU: [[LOOP28]] = distinct !{[[LOOP28]], [[META23]], [[META24]]} -; AMDGPU: [[LOOP29]] = distinct !{[[LOOP29]], [[META23]], [[META24]]} +; AMDGPU: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; AMDGPU: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; AMDGPU: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; AMDGPU: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} +; AMDGPU: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} +; AMDGPU: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +; AMDGPU: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +; AMDGPU: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0} +; AMDGPU: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0} +; AMDGPU: [[META15]] = !{!"Simple C/C++ TBAA"} +; AMDGPU: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]} +; AMDGPU: [[META17]] = !{!"llvm.loop.mustprogress"} +; AMDGPU: [[META18]] = !{!"llvm.loop.unroll.disable"} +; AMDGPU: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]} +; AMDGPU: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} +; AMDGPU: [[META21]] = !{!"any pointer", [[META14]], i64 0} +; AMDGPU: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]} +; AMDGPU: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]} ;. ; NVPTX: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} ; NVPTX: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} @@ -4519,30 +3422,24 @@ attributes #11 = { convergent } ; NVPTX: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} ; NVPTX: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} ; NVPTX: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} -; NVPTX: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} -; NVPTX: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} -; NVPTX: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} -; NVPTX: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} -; NVPTX: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} -; NVPTX: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} -; NVPTX: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; NVPTX: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; NVPTX: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; NVPTX: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} -; NVPTX: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} -; NVPTX: [[META17:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -; NVPTX: [[TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0} -; NVPTX: [[META19]] = !{!"int", [[META20:![0-9]+]], i64 0} -; NVPTX: [[META20]] = !{!"omnipotent char", [[META21:![0-9]+]], i64 0} -; NVPTX: [[META21]] = !{!"Simple C/C++ TBAA"} -; NVPTX: [[LOOP22]] = distinct !{[[LOOP22]], [[META23:![0-9]+]], [[META24:![0-9]+]]} -; NVPTX: [[META23]] = !{!"llvm.loop.mustprogress"} -; NVPTX: [[META24]] = !{!"llvm.loop.unroll.disable"} -; NVPTX: [[LOOP25]] = distinct !{[[LOOP25]], [[META23]], [[META24]]} -; NVPTX: [[TBAA26]] = !{[[META27:![0-9]+]], [[META27]], i64 0} -; NVPTX: [[META27]] = !{!"any pointer", [[META20]], i64 0} -; NVPTX: [[LOOP28]] = distinct !{[[LOOP28]], [[META23]], [[META24]]} -; NVPTX: [[LOOP29]] = distinct !{[[LOOP29]], [[META23]], [[META24]]} +; NVPTX: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; NVPTX: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; NVPTX: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; NVPTX: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} +; NVPTX: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} +; NVPTX: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +; NVPTX: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +; NVPTX: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0} +; NVPTX: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0} +; NVPTX: [[META15]] = !{!"Simple C/C++ TBAA"} +; NVPTX: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]} +; NVPTX: [[META17]] = !{!"llvm.loop.mustprogress"} +; NVPTX: [[META18]] = !{!"llvm.loop.unroll.disable"} +; NVPTX: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]} +; NVPTX: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} +; NVPTX: [[META21]] = !{!"any pointer", [[META14]], i64 0} +; NVPTX: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]} +; NVPTX: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]} ;. ; AMDGPU-DISABLED1: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} ; AMDGPU-DISABLED1: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} @@ -4550,30 +3447,24 @@ attributes #11 = { convergent } ; AMDGPU-DISABLED1: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} ; AMDGPU-DISABLED1: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} ; AMDGPU-DISABLED1: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} -; AMDGPU-DISABLED1: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} -; AMDGPU-DISABLED1: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} -; AMDGPU-DISABLED1: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} -; AMDGPU-DISABLED1: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} -; AMDGPU-DISABLED1: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} -; AMDGPU-DISABLED1: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} -; AMDGPU-DISABLED1: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; AMDGPU-DISABLED1: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; AMDGPU-DISABLED1: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; AMDGPU-DISABLED1: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} -; AMDGPU-DISABLED1: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} -; AMDGPU-DISABLED1: [[META17:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -; AMDGPU-DISABLED1: [[TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0} -; AMDGPU-DISABLED1: [[META19]] = !{!"int", [[META20:![0-9]+]], i64 0} -; AMDGPU-DISABLED1: [[META20]] = !{!"omnipotent char", [[META21:![0-9]+]], i64 0} -; AMDGPU-DISABLED1: [[META21]] = !{!"Simple C/C++ TBAA"} -; AMDGPU-DISABLED1: [[LOOP22]] = distinct !{[[LOOP22]], [[META23:![0-9]+]], [[META24:![0-9]+]]} -; AMDGPU-DISABLED1: [[META23]] = !{!"llvm.loop.mustprogress"} -; AMDGPU-DISABLED1: [[META24]] = !{!"llvm.loop.unroll.disable"} -; AMDGPU-DISABLED1: [[LOOP25]] = distinct !{[[LOOP25]], [[META23]], [[META24]]} -; AMDGPU-DISABLED1: [[TBAA26]] = !{[[META27:![0-9]+]], [[META27]], i64 0} -; AMDGPU-DISABLED1: [[META27]] = !{!"any pointer", [[META20]], i64 0} -; AMDGPU-DISABLED1: [[LOOP28]] = distinct !{[[LOOP28]], [[META23]], [[META24]]} -; AMDGPU-DISABLED1: [[LOOP29]] = distinct !{[[LOOP29]], [[META23]], [[META24]]} +; AMDGPU-DISABLED1: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; AMDGPU-DISABLED1: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; AMDGPU-DISABLED1: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; AMDGPU-DISABLED1: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} +; AMDGPU-DISABLED1: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} +; AMDGPU-DISABLED1: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +; AMDGPU-DISABLED1: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +; AMDGPU-DISABLED1: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0} +; AMDGPU-DISABLED1: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0} +; AMDGPU-DISABLED1: [[META15]] = !{!"Simple C/C++ TBAA"} +; AMDGPU-DISABLED1: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]} +; AMDGPU-DISABLED1: [[META17]] = !{!"llvm.loop.mustprogress"} +; AMDGPU-DISABLED1: [[META18]] = !{!"llvm.loop.unroll.disable"} +; AMDGPU-DISABLED1: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]} +; AMDGPU-DISABLED1: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} +; AMDGPU-DISABLED1: [[META21]] = !{!"any pointer", [[META14]], i64 0} +; AMDGPU-DISABLED1: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]} +; AMDGPU-DISABLED1: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]} ;. ; AMDGPU-DISABLED2: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} ; AMDGPU-DISABLED2: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} @@ -4581,30 +3472,24 @@ attributes #11 = { convergent } ; AMDGPU-DISABLED2: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} ; AMDGPU-DISABLED2: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} ; AMDGPU-DISABLED2: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} -; AMDGPU-DISABLED2: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} -; AMDGPU-DISABLED2: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} -; AMDGPU-DISABLED2: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} -; AMDGPU-DISABLED2: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} -; AMDGPU-DISABLED2: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} -; AMDGPU-DISABLED2: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} -; AMDGPU-DISABLED2: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; AMDGPU-DISABLED2: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; AMDGPU-DISABLED2: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; AMDGPU-DISABLED2: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} -; AMDGPU-DISABLED2: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} -; AMDGPU-DISABLED2: [[META17:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -; AMDGPU-DISABLED2: [[TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0} -; AMDGPU-DISABLED2: [[META19]] = !{!"int", [[META20:![0-9]+]], i64 0} -; AMDGPU-DISABLED2: [[META20]] = !{!"omnipotent char", [[META21:![0-9]+]], i64 0} -; AMDGPU-DISABLED2: [[META21]] = !{!"Simple C/C++ TBAA"} -; AMDGPU-DISABLED2: [[LOOP22]] = distinct !{[[LOOP22]], [[META23:![0-9]+]], [[META24:![0-9]+]]} -; AMDGPU-DISABLED2: [[META23]] = !{!"llvm.loop.mustprogress"} -; AMDGPU-DISABLED2: [[META24]] = !{!"llvm.loop.unroll.disable"} -; AMDGPU-DISABLED2: [[LOOP25]] = distinct !{[[LOOP25]], [[META23]], [[META24]]} -; AMDGPU-DISABLED2: [[TBAA26]] = !{[[META27:![0-9]+]], [[META27]], i64 0} -; AMDGPU-DISABLED2: [[META27]] = !{!"any pointer", [[META20]], i64 0} -; AMDGPU-DISABLED2: [[LOOP28]] = distinct !{[[LOOP28]], [[META23]], [[META24]]} -; AMDGPU-DISABLED2: [[LOOP29]] = distinct !{[[LOOP29]], [[META23]], [[META24]]} +; AMDGPU-DISABLED2: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; AMDGPU-DISABLED2: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; AMDGPU-DISABLED2: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; AMDGPU-DISABLED2: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} +; AMDGPU-DISABLED2: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} +; AMDGPU-DISABLED2: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +; AMDGPU-DISABLED2: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +; AMDGPU-DISABLED2: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0} +; AMDGPU-DISABLED2: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0} +; AMDGPU-DISABLED2: [[META15]] = !{!"Simple C/C++ TBAA"} +; AMDGPU-DISABLED2: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]} +; AMDGPU-DISABLED2: [[META17]] = !{!"llvm.loop.mustprogress"} +; AMDGPU-DISABLED2: [[META18]] = !{!"llvm.loop.unroll.disable"} +; AMDGPU-DISABLED2: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]} +; AMDGPU-DISABLED2: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} +; AMDGPU-DISABLED2: [[META21]] = !{!"any pointer", [[META14]], i64 0} +; AMDGPU-DISABLED2: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]} +; AMDGPU-DISABLED2: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]} ;. ; NVPTX-DISABLED1: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} ; NVPTX-DISABLED1: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} @@ -4612,30 +3497,24 @@ attributes #11 = { convergent } ; NVPTX-DISABLED1: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} ; NVPTX-DISABLED1: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} ; NVPTX-DISABLED1: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} -; NVPTX-DISABLED1: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} -; NVPTX-DISABLED1: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} -; NVPTX-DISABLED1: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} -; NVPTX-DISABLED1: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} -; NVPTX-DISABLED1: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} -; NVPTX-DISABLED1: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} -; NVPTX-DISABLED1: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; NVPTX-DISABLED1: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; NVPTX-DISABLED1: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; NVPTX-DISABLED1: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} -; NVPTX-DISABLED1: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} -; NVPTX-DISABLED1: [[META17:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -; NVPTX-DISABLED1: [[TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0} -; NVPTX-DISABLED1: [[META19]] = !{!"int", [[META20:![0-9]+]], i64 0} -; NVPTX-DISABLED1: [[META20]] = !{!"omnipotent char", [[META21:![0-9]+]], i64 0} -; NVPTX-DISABLED1: [[META21]] = !{!"Simple C/C++ TBAA"} -; NVPTX-DISABLED1: [[LOOP22]] = distinct !{[[LOOP22]], [[META23:![0-9]+]], [[META24:![0-9]+]]} -; NVPTX-DISABLED1: [[META23]] = !{!"llvm.loop.mustprogress"} -; NVPTX-DISABLED1: [[META24]] = !{!"llvm.loop.unroll.disable"} -; NVPTX-DISABLED1: [[LOOP25]] = distinct !{[[LOOP25]], [[META23]], [[META24]]} -; NVPTX-DISABLED1: [[TBAA26]] = !{[[META27:![0-9]+]], [[META27]], i64 0} -; NVPTX-DISABLED1: [[META27]] = !{!"any pointer", [[META20]], i64 0} -; NVPTX-DISABLED1: [[LOOP28]] = distinct !{[[LOOP28]], [[META23]], [[META24]]} -; NVPTX-DISABLED1: [[LOOP29]] = distinct !{[[LOOP29]], [[META23]], [[META24]]} +; NVPTX-DISABLED1: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; NVPTX-DISABLED1: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; NVPTX-DISABLED1: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; NVPTX-DISABLED1: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} +; NVPTX-DISABLED1: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} +; NVPTX-DISABLED1: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +; NVPTX-DISABLED1: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +; NVPTX-DISABLED1: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0} +; NVPTX-DISABLED1: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0} +; NVPTX-DISABLED1: [[META15]] = !{!"Simple C/C++ TBAA"} +; NVPTX-DISABLED1: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]} +; NVPTX-DISABLED1: [[META17]] = !{!"llvm.loop.mustprogress"} +; NVPTX-DISABLED1: [[META18]] = !{!"llvm.loop.unroll.disable"} +; NVPTX-DISABLED1: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]} +; NVPTX-DISABLED1: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} +; NVPTX-DISABLED1: [[META21]] = !{!"any pointer", [[META14]], i64 0} +; NVPTX-DISABLED1: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]} +; NVPTX-DISABLED1: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]} ;. ; NVPTX-DISABLED2: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} ; NVPTX-DISABLED2: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} @@ -4643,28 +3522,22 @@ attributes #11 = { convergent } ; NVPTX-DISABLED2: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} ; NVPTX-DISABLED2: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} ; NVPTX-DISABLED2: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} -; NVPTX-DISABLED2: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} -; NVPTX-DISABLED2: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} -; NVPTX-DISABLED2: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} -; NVPTX-DISABLED2: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} -; NVPTX-DISABLED2: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} -; NVPTX-DISABLED2: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} -; NVPTX-DISABLED2: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; NVPTX-DISABLED2: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; NVPTX-DISABLED2: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; NVPTX-DISABLED2: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} -; NVPTX-DISABLED2: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} -; NVPTX-DISABLED2: [[META17:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -; NVPTX-DISABLED2: [[TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0} -; NVPTX-DISABLED2: [[META19]] = !{!"int", [[META20:![0-9]+]], i64 0} -; NVPTX-DISABLED2: [[META20]] = !{!"omnipotent char", [[META21:![0-9]+]], i64 0} -; NVPTX-DISABLED2: [[META21]] = !{!"Simple C/C++ TBAA"} -; NVPTX-DISABLED2: [[LOOP22]] = distinct !{[[LOOP22]], [[META23:![0-9]+]], [[META24:![0-9]+]]} -; NVPTX-DISABLED2: [[META23]] = !{!"llvm.loop.mustprogress"} -; NVPTX-DISABLED2: [[META24]] = !{!"llvm.loop.unroll.disable"} -; NVPTX-DISABLED2: [[LOOP25]] = distinct !{[[LOOP25]], [[META23]], [[META24]]} -; NVPTX-DISABLED2: [[TBAA26]] = !{[[META27:![0-9]+]], [[META27]], i64 0} -; NVPTX-DISABLED2: [[META27]] = !{!"any pointer", [[META20]], i64 0} -; NVPTX-DISABLED2: [[LOOP28]] = distinct !{[[LOOP28]], [[META23]], [[META24]]} -; NVPTX-DISABLED2: [[LOOP29]] = distinct !{[[LOOP29]], [[META23]], [[META24]]} +; NVPTX-DISABLED2: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; NVPTX-DISABLED2: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; NVPTX-DISABLED2: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; NVPTX-DISABLED2: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} +; NVPTX-DISABLED2: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} +; NVPTX-DISABLED2: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +; NVPTX-DISABLED2: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +; NVPTX-DISABLED2: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0} +; NVPTX-DISABLED2: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0} +; NVPTX-DISABLED2: [[META15]] = !{!"Simple C/C++ TBAA"} +; NVPTX-DISABLED2: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]} +; NVPTX-DISABLED2: [[META17]] = !{!"llvm.loop.mustprogress"} +; NVPTX-DISABLED2: [[META18]] = !{!"llvm.loop.unroll.disable"} +; NVPTX-DISABLED2: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]} +; NVPTX-DISABLED2: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} +; NVPTX-DISABLED2: [[META21]] = !{!"any pointer", [[META14]], i64 0} +; NVPTX-DISABLED2: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]} +; NVPTX-DISABLED2: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]} ;. diff --git a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll index 2f43a4e4286a23..99715cf5b4032c 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll @@ -28,7 +28,7 @@ target triple = "nvptx64" ; CHECK: @__omp_offloading_fd02_404433c2_main_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } ; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8 ;. -define weak void @__omp_offloading_fd02_404433c2_main_l5(ptr %dyn, ptr nonnull align 8 dereferenceable(8) %x) local_unnamed_addr #0 { +define weak ptx_kernel void @__omp_offloading_fd02_404433c2_main_l5(ptr %dyn, ptr nonnull align 8 dereferenceable(8) %x) local_unnamed_addr #0 { ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_404433c2_main_l5 ; CHECK-SAME: (ptr [[DYN:%.*]], ptr nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -47,7 +47,7 @@ define weak void @__omp_offloading_fd02_404433c2_main_l5(ptr %dyn, ptr nonnull a ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] ; CHECK: region.guarded: -; CHECK-NEXT: store double [[CALL_I]], ptr [[X]], align 8, !tbaa [[TBAA8:![0-9]+]] +; CHECK-NEXT: store double [[CALL_I]], ptr [[X]], align 8, !tbaa [[TBAA7:![0-9]+]] ; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]] ; CHECK: region.guarded.end: ; CHECK-NEXT: br label [[REGION_BARRIER]] @@ -127,12 +127,10 @@ attributes #5 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "st attributes #6 = { convergent nounwind "llvm.assume"="ompx_spmd_amenable" } !omp_offload.info = !{!0} -!nvvm.annotations = !{!1} !llvm.module.flags = !{!2, !3, !4, !5, !6} !llvm.ident = !{!7} !0 = !{i32 0, i32 64770, i32 1078211522, !"main", i32 5, i32 0} -!1 = !{ptr @__omp_offloading_fd02_404433c2_main_l5, !"kernel", i32 1} !2 = !{i32 1, !"wchar_size", i32 4} !3 = !{i32 7, !"openmp", i32 50} !4 = !{i32 7, !"openmp-device", i32 50} @@ -154,15 +152,14 @@ attributes #6 = { convergent nounwind "llvm.assume"="ompx_spmd_amenable" } ; CHECK: attributes #[[ATTR7]] = { convergent nounwind "llvm.assume"="ompx_spmd_amenable" } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 1078211522, !"main", i32 5, i32 0} -; CHECK: [[META1:![0-9]+]] = !{ptr @__omp_offloading_fd02_404433c2_main_l5, !"kernel", i32 1} -; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CHECK: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} -; CHECK: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} -; CHECK: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -; CHECK: [[TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} -; CHECK: [[META9]] = !{!"double", [[META10:![0-9]+]], i64 0} -; CHECK: [[META10]] = !{!"omnipotent char", [[META11:![0-9]+]], i64 0} -; CHECK: [[META11]] = !{!"Simple C/C++ TBAA"} +; CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; CHECK: [[META2:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; CHECK: [[META4:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} +; CHECK: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} +; CHECK: [[META6:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +; CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} +; CHECK: [[META8]] = !{!"double", [[META9:![0-9]+]], i64 0} +; CHECK: [[META9]] = !{!"omnipotent char", [[META10:![0-9]+]], i64 0} +; CHECK: [[META10]] = !{!"Simple C/C++ TBAA"} ;. diff --git a/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll b/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll index 75e01f3295fe23..953ecb2ddd8a66 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll @@ -297,12 +297,10 @@ attributes #14 = { convergent nounwind "llvm.assume"="ompx_aligned_barrier,ompx_ attributes #15 = { convergent nounwind } !omp_offload.info = !{!0} -!nvvm.annotations = !{!1} !llvm.module.flags = !{!2, !3, !4, !5} !llvm.ident = !{!6} !0 = !{i32 0, i32 32, i32 18757968, !"main", i32 12, i32 0} -!1 = !{ptr @__omp_offloading_20_11e3950_main_l12, !"kernel", i32 1} !2 = !{i32 1, !"wchar_size", i32 4} !3 = !{i32 7, !"openmp", i32 50} !4 = !{i32 7, !"openmp-device", i32 50} diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll index 229a49d7845593..bbf1de253de92a 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll @@ -58,7 +58,7 @@ target triple = "nvptx64" ; CHECK-DISABLED: @__omp_offloading_2a_fbfa7a_sequential_loop_l6_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } ; CHECK-DISABLED: @__omp_outlined__1_wrapper.ID = private constant i8 undef ;. -define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x, i64 %N) #0 { +define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x, i64 %N) #0 { ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2a_fbfa7a_sequential_loop_l6 ; CHECK-SAME: (ptr [[DYN:%.*]], ptr [[X:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -84,9 +84,9 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] ; CHECK: region.guarded: -; CHECK-NEXT: store i32 0, ptr [[X]], align 4, !noalias [[META8:![0-9]+]] -; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias [[META8]] -; CHECK-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARRAYIDX2_I]], align 4, !noalias [[META8]] +; CHECK-NEXT: store i32 0, ptr [[X]], align 4, !noalias [[META7:![0-9]+]] +; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias [[META7]] +; CHECK-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARRAYIDX2_I]], align 4, !noalias [[META7]] ; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]] ; CHECK: region.guarded.end: ; CHECK-NEXT: br label [[REGION_BARRIER]] @@ -111,7 +111,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[REGION_GUARDED4:%.*]], label [[REGION_BARRIER2:%.*]] ; CHECK: region.guarded4: -; CHECK-NEXT: store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias [[META8]] +; CHECK-NEXT: store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias [[META7]] ; CHECK-NEXT: br label [[REGION_GUARDED_END1:%.*]] ; CHECK: region.guarded.end1: ; CHECK-NEXT: br label [[REGION_BARRIER2]] @@ -120,10 +120,10 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x ; CHECK-NEXT: br label [[REGION_EXIT3]] ; CHECK: region.exit3: ; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -; CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: __omp_outlined__.exit: ; CHECK-NEXT: call void @__kmpc_parallel_51(ptr null, i32 0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr null, i64 0) -; CHECK-NEXT: [[CALL_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10:[0-9]+]], !noalias [[META8]] +; CHECK-NEXT: [[CALL_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10:[0-9]+]], !noalias [[META7]] ; CHECK-NEXT: [[IDXPROM6_I:%.*]] = sext i32 [[CALL_I]] to i64 ; CHECK-NEXT: [[ARRAYIDX7_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM6_I]] ; CHECK-NEXT: br label [[REGION_CHECK_TID10:%.*]] @@ -132,7 +132,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[REGION_GUARDED9:%.*]], label [[REGION_BARRIER7:%.*]] ; CHECK: region.guarded9: -; CHECK-NEXT: store i32 [[CALL_I]], ptr [[ARRAYIDX7_I]], align 4, !noalias [[META8]] +; CHECK-NEXT: store i32 [[CALL_I]], ptr [[ARRAYIDX7_I]], align 4, !noalias [[META7]] ; CHECK-NEXT: br label [[REGION_GUARDED_END6:%.*]] ; CHECK: region.guarded.end6: ; CHECK-NEXT: br label [[REGION_BARRIER7]] @@ -140,7 +140,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP6]]) ; CHECK-NEXT: br label [[REGION_EXIT8:%.*]] ; CHECK: region.exit8: -; CHECK-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]] +; CHECK-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]] ; CHECK-NEXT: [[IDXPROM9_I:%.*]] = sext i32 [[CALL8_I]] to i64 ; CHECK-NEXT: [[ARRAYIDX10_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM9_I]] ; CHECK-NEXT: br label [[REGION_CHECK_TID15:%.*]] @@ -149,7 +149,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 ; CHECK-NEXT: br i1 [[TMP9]], label [[REGION_GUARDED14:%.*]], label [[REGION_BARRIER12:%.*]] ; CHECK: region.guarded14: -; CHECK-NEXT: store i32 [[CALL8_I]], ptr [[ARRAYIDX10_I]], align 4, !noalias [[META8]] +; CHECK-NEXT: store i32 [[CALL8_I]], ptr [[ARRAYIDX10_I]], align 4, !noalias [[META7]] ; CHECK-NEXT: br label [[REGION_GUARDED_END11:%.*]] ; CHECK: region.guarded.end11: ; CHECK-NEXT: br label [[REGION_BARRIER12]] @@ -157,7 +157,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP8]]) ; CHECK-NEXT: br label [[REGION_EXIT13:%.*]] ; CHECK: region.exit13: -; CHECK-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]] +; CHECK-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]] ; CHECK-NEXT: [[IDXPROM12_I:%.*]] = sext i32 [[CALL11_I]] to i64 ; CHECK-NEXT: [[ARRAYIDX13_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM12_I]] ; CHECK-NEXT: br label [[REGION_CHECK_TID20:%.*]] @@ -166,7 +166,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0 ; CHECK-NEXT: br i1 [[TMP11]], label [[REGION_GUARDED19:%.*]], label [[REGION_BARRIER17:%.*]] ; CHECK: region.guarded19: -; CHECK-NEXT: store i32 [[CALL11_I]], ptr [[ARRAYIDX13_I]], align 4, !noalias [[META8]] +; CHECK-NEXT: store i32 [[CALL11_I]], ptr [[ARRAYIDX13_I]], align 4, !noalias [[META7]] ; CHECK-NEXT: br label [[REGION_GUARDED_END16:%.*]] ; CHECK: region.guarded.end16: ; CHECK-NEXT: br label [[REGION_BARRIER17]] @@ -174,9 +174,9 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP10]]) ; CHECK-NEXT: br label [[REGION_EXIT18:%.*]] ; CHECK: region.exit18: -; CHECK-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]] -; CHECK-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]] -; CHECK-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]] +; CHECK-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]] +; CHECK-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]] +; CHECK-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]] ; CHECK-NEXT: call void @__kmpc_target_deinit() #[[ATTR6]] ; CHECK-NEXT: ret void ; CHECK: worker.exit: @@ -230,13 +230,13 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x ; CHECK-DISABLED-NEXT: [[SELECT:%.*]] = select i1 [[C]], ptr [[AL32]], ptr addrspacecast (ptr addrspace(5) @LocGlob to ptr) ; CHECK-DISABLED-NEXT: store ptr [[SELECT]], ptr [[LOC]], align 8 ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR6]] -; CHECK-DISABLED-NEXT: store i32 0, ptr [[X]], align 4, !noalias [[META8:![0-9]+]] +; CHECK-DISABLED-NEXT: store i32 0, ptr [[X]], align 4, !noalias [[META7:![0-9]+]] ; CHECK-DISABLED-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 1 -; CHECK-DISABLED-NEXT: store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias [[META8]] +; CHECK-DISABLED-NEXT: store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias [[META7]] ; CHECK-DISABLED-NEXT: [[SEXT:%.*]] = shl i64 [[N]], 32 ; CHECK-DISABLED-NEXT: [[IDXPROM_I:%.*]] = ashr exact i64 [[SEXT]], 32 ; CHECK-DISABLED-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM_I]] -; CHECK-DISABLED-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARRAYIDX2_I]], align 4, !noalias [[META8]] +; CHECK-DISABLED-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARRAYIDX2_I]], align 4, !noalias [[META7]] ; CHECK-DISABLED-NEXT: call void @usei8ptr(ptr nocapture [[HEAP2STACK_H2S]]) #[[ATTR9:[0-9]+]] ; CHECK-DISABLED-NEXT: br label [[FOR_COND_I:%.*]] ; CHECK-DISABLED: for.cond.i: @@ -248,26 +248,26 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x ; CHECK-DISABLED-NEXT: [[SUB3_I:%.*]] = add nsw i32 [[I_0_I]], -1 ; CHECK-DISABLED-NEXT: [[IDXPROM4_I:%.*]] = zext i32 [[I_0_I]] to i64 ; CHECK-DISABLED-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM4_I]] -; CHECK-DISABLED-NEXT: store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias [[META8]] +; CHECK-DISABLED-NEXT: store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias [[META7]] ; CHECK-DISABLED-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -; CHECK-DISABLED-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-DISABLED-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-DISABLED: __omp_outlined__.exit: ; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr null, i32 0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr null, i64 0) -; CHECK-DISABLED-NEXT: [[CALL_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10:[0-9]+]], !noalias [[META8]] +; CHECK-DISABLED-NEXT: [[CALL_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10:[0-9]+]], !noalias [[META7]] ; CHECK-DISABLED-NEXT: [[IDXPROM6_I:%.*]] = sext i32 [[CALL_I]] to i64 ; CHECK-DISABLED-NEXT: [[ARRAYIDX7_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM6_I]] -; CHECK-DISABLED-NEXT: store i32 [[CALL_I]], ptr [[ARRAYIDX7_I]], align 4, !noalias [[META8]] -; CHECK-DISABLED-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]] +; CHECK-DISABLED-NEXT: store i32 [[CALL_I]], ptr [[ARRAYIDX7_I]], align 4, !noalias [[META7]] +; CHECK-DISABLED-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]] ; CHECK-DISABLED-NEXT: [[IDXPROM9_I:%.*]] = sext i32 [[CALL8_I]] to i64 ; CHECK-DISABLED-NEXT: [[ARRAYIDX10_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM9_I]] -; CHECK-DISABLED-NEXT: store i32 [[CALL8_I]], ptr [[ARRAYIDX10_I]], align 4, !noalias [[META8]] -; CHECK-DISABLED-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]] +; CHECK-DISABLED-NEXT: store i32 [[CALL8_I]], ptr [[ARRAYIDX10_I]], align 4, !noalias [[META7]] +; CHECK-DISABLED-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]] ; CHECK-DISABLED-NEXT: [[IDXPROM12_I:%.*]] = sext i32 [[CALL11_I]] to i64 ; CHECK-DISABLED-NEXT: [[ARRAYIDX13_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM12_I]] -; CHECK-DISABLED-NEXT: store i32 [[CALL11_I]], ptr [[ARRAYIDX13_I]], align 4, !noalias [[META8]] -; CHECK-DISABLED-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]] -; CHECK-DISABLED-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]] -; CHECK-DISABLED-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]] +; CHECK-DISABLED-NEXT: store i32 [[CALL11_I]], ptr [[ARRAYIDX13_I]], align 4, !noalias [[META7]] +; CHECK-DISABLED-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]] +; CHECK-DISABLED-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]] +; CHECK-DISABLED-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]] ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit() #[[ATTR6]] ; CHECK-DISABLED-NEXT: ret void ; CHECK-DISABLED: worker.exit: @@ -404,12 +404,10 @@ attributes #4 = { inaccessiblememonly nofree nosync nounwind willreturn } attributes #5 = { convergent nounwind "llvm.assume"="omp_no_openmp,ompx_spmd_amenable" } !omp_offload.info = !{!0} -!nvvm.annotations = !{!1} !llvm.module.flags = !{!2, !3, !4, !5, !6} !llvm.ident = !{!7} !0 = !{i32 0, i32 42, i32 16513658, !"sequential_loop", i32 6, i32 0} -!1 = !{ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6, !"kernel", i32 1} !2 = !{i32 1, !"wchar_size", i32 4} !3 = !{i32 7, !"openmp", i32 50} !4 = !{i32 7, !"openmp-device", i32 50} @@ -447,30 +445,28 @@ attributes #5 = { convergent nounwind "llvm.assume"="omp_no_openmp,ompx_spmd_ame ; CHECK-DISABLED: attributes #[[ATTR10]] = { convergent nounwind "llvm.assume"="omp_no_openmp,ompx_spmd_amenable" } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 42, i32 16513658, !"sequential_loop", i32 6, i32 0} -; CHECK: [[META1:![0-9]+]] = !{ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6, !"kernel", i32 1} -; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CHECK: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} -; CHECK: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} -; CHECK: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -; CHECK: [[META8]] = !{[[META9:![0-9]+]]} -; CHECK: [[META9]] = distinct !{[[META9]], [[META10:![0-9]+]], !"__omp_outlined__: %__context"} -; CHECK: [[META10]] = distinct !{[[META10]], !"__omp_outlined__"} -; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META12:![0-9]+]]} -; CHECK: [[META12]] = !{!"llvm.loop.mustprogress"} +; CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; CHECK: [[META2:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; CHECK: [[META4:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} +; CHECK: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} +; CHECK: [[META6:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +; CHECK: [[META7]] = !{[[META8:![0-9]+]]} +; CHECK: [[META8]] = distinct !{[[META8]], [[META9:![0-9]+]], !"__omp_outlined__: %__context"} +; CHECK: [[META9]] = distinct !{[[META9]], !"__omp_outlined__"} +; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META11:![0-9]+]]} +; CHECK: [[META11]] = !{!"llvm.loop.mustprogress"} ;. ; CHECK-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 42, i32 16513658, !"sequential_loop", i32 6, i32 0} -; CHECK-DISABLED: [[META1:![0-9]+]] = !{ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6, !"kernel", i32 1} -; CHECK-DISABLED: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; CHECK-DISABLED: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; CHECK-DISABLED: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CHECK-DISABLED: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} -; CHECK-DISABLED: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} -; CHECK-DISABLED: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -; CHECK-DISABLED: [[META8]] = !{[[META9:![0-9]+]]} -; CHECK-DISABLED: [[META9]] = distinct !{[[META9]], [[META10:![0-9]+]], !"__omp_outlined__: %__context"} -; CHECK-DISABLED: [[META10]] = distinct !{[[META10]], !"__omp_outlined__"} -; CHECK-DISABLED: [[LOOP11]] = distinct !{[[LOOP11]], [[META12:![0-9]+]]} -; CHECK-DISABLED: [[META12]] = !{!"llvm.loop.mustprogress"} +; CHECK-DISABLED: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; CHECK-DISABLED: [[META2:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; CHECK-DISABLED: [[META3:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; CHECK-DISABLED: [[META4:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} +; CHECK-DISABLED: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} +; CHECK-DISABLED: [[META6:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +; CHECK-DISABLED: [[META7]] = !{[[META8:![0-9]+]]} +; CHECK-DISABLED: [[META8]] = distinct !{[[META8]], [[META9:![0-9]+]], !"__omp_outlined__: %__context"} +; CHECK-DISABLED: [[META9]] = distinct !{[[META9]], !"__omp_outlined__"} +; CHECK-DISABLED: [[LOOP10]] = distinct !{[[LOOP10]], [[META11:![0-9]+]]} +; CHECK-DISABLED: [[META11]] = !{!"llvm.loop.mustprogress"} ;. diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll index 11405b7eb447c2..a644fe1b2f8217 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll @@ -56,7 +56,7 @@ target triple = "nvptx64" ; CHECK-DISABLE-SPMDIZATION: @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } ; CHECK-DISABLE-SPMDIZATION: @__omp_offloading_2b_10393b5_generic_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } ;. -define weak void @__omp_offloading_2b_10393b5_spmd_l12(ptr %dyn) "kernel" #0 { +define weak ptx_kernel void @__omp_offloading_2b_10393b5_spmd_l12(ptr %dyn) "kernel" #0 { ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_spmd_l12 ; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -113,7 +113,7 @@ define weak i32 @__kmpc_target_init(ptr, ptr) { declare void @__kmpc_target_deinit() ; Function Attrs: convergent noinline norecurse nounwind -define weak void @__omp_offloading_2b_10393b5_generic_l20(ptr %dyn) #0 { +define weak ptx_kernel void @__omp_offloading_2b_10393b5_generic_l20(ptr %dyn) #0 { ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_generic_l20 ; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -321,14 +321,12 @@ attributes #4 = { alwaysinline } attributes #5 = { convergent } !omp_offload.info = !{!0, !1} -!nvvm.annotations = !{!2, !3} !llvm.module.flags = !{!4, !5, !6, !7, !8} !llvm.ident = !{!9} !0 = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0} !1 = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1} -!2 = !{ptr @__omp_offloading_2b_10393b5_spmd_l12, !"kernel", i32 1} -!3 = !{ptr @__omp_offloading_2b_10393b5_generic_l20, !"kernel", i32 1} + !4 = !{i32 1, !"wchar_size", i32 4} !5 = !{i32 7, !"openmp", i32 50} !6 = !{i32 7, !"openmp-device", i32 50} @@ -358,23 +356,19 @@ attributes #5 = { convergent } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0} ; CHECK: [[META1:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1} -; CHECK: [[META2:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_spmd_l12, !"kernel", i32 1} -; CHECK: [[META3:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_generic_l20, !"kernel", i32 1} -; CHECK: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; CHECK: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; CHECK: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CHECK: [[META7:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} -; CHECK: [[META8:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} -; CHECK: [[META9:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; CHECK: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} +; CHECK: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} +; CHECK: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} ;. ; CHECK-DISABLE-SPMDIZATION: [[META0:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0} ; CHECK-DISABLE-SPMDIZATION: [[META1:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1} -; CHECK-DISABLE-SPMDIZATION: [[META2:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_spmd_l12, !"kernel", i32 1} -; CHECK-DISABLE-SPMDIZATION: [[META3:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_generic_l20, !"kernel", i32 1} -; CHECK-DISABLE-SPMDIZATION: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; CHECK-DISABLE-SPMDIZATION: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; CHECK-DISABLE-SPMDIZATION: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CHECK-DISABLE-SPMDIZATION: [[META7:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} -; CHECK-DISABLE-SPMDIZATION: [[META8:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} -; CHECK-DISABLE-SPMDIZATION: [[META9:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +; CHECK-DISABLE-SPMDIZATION: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; CHECK-DISABLE-SPMDIZATION: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; CHECK-DISABLE-SPMDIZATION: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; CHECK-DISABLE-SPMDIZATION: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} +; CHECK-DISABLE-SPMDIZATION: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} +; CHECK-DISABLE-SPMDIZATION: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} ;. diff --git a/llvm/test/Transforms/OpenMP/spmdization_indirect.ll b/llvm/test/Transforms/OpenMP/spmdization_indirect.ll index f348825446c63d..6dfc14e9270ed2 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_indirect.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_indirect.ll @@ -28,7 +28,7 @@ ; NVPTX: @spmd_and_non_spmd_callees_metadata_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } ; NVPTX: @spmd_and_non_spmd_callee_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } ;. -define weak void @spmd_callees(i1 %c) #0 { +define weak ptx_kernel void @spmd_callees(i1 %c) #0 { ; AMDGPU-LABEL: define {{[^@]+}}@spmd_callees ; AMDGPU-SAME: (i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] { ; AMDGPU-NEXT: call void @spmd_callees__debug(i1 [[C]]) @@ -57,7 +57,7 @@ define internal void @spmd_callees__debug(i1 %c) { ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10:[0-9]+]] ; AMDGPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA17:![0-9]+]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]] ; AMDGPU-NEXT: [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable1, ptr @__omp_outlined_spmd_amenable2 ; AMDGPU-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable2 ; AMDGPU-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] @@ -88,7 +88,7 @@ define internal void @spmd_callees__debug(i1 %c) { ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10:[0-9]+]] ; NVPTX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA17:![0-9]+]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]] ; NVPTX-NEXT: [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable1, ptr @__omp_outlined_spmd_amenable2 ; NVPTX-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable2 ; NVPTX-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] @@ -143,10 +143,10 @@ define internal void @__omp_outlined_spmd_amenable1(ptr noalias %.global_tid., p ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6:[0-9]+]] ; AMDGPU-NEXT: ret void ; AMDGPU: for.body: -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA17]] +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable1 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { @@ -161,10 +161,10 @@ define internal void @__omp_outlined_spmd_amenable1(ptr noalias %.global_tid., p ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6:[0-9]+]] ; NVPTX-NEXT: ret void ; NVPTX: for.body: -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA17]] +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] ; entry: %captured_vars_addrs = alloca [0 x ptr], align 8 @@ -262,10 +262,10 @@ define internal void @__omp_outlined_spmd_amenable2(ptr noalias %.global_tid., p ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6]] ; AMDGPU-NEXT: ret void ; AMDGPU: for.body: -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA17]] +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable2 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { @@ -282,10 +282,10 @@ define internal void @__omp_outlined_spmd_amenable2(ptr noalias %.global_tid., p ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6]] ; NVPTX-NEXT: ret void ; NVPTX: for.body: -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA17]] +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] ; entry: %captured_vars_addrs = alloca [0 x ptr], align 8 @@ -367,7 +367,7 @@ entry: ; Function Attrs: alwaysinline convergent norecurse nounwind -define weak void @spmd_and_non_spmd_callee(i1 %c) #0 { +define weak ptx_kernel void @spmd_and_non_spmd_callee(i1 %c) #0 { ; ; ; AMDGPU-LABEL: define {{[^@]+}}@spmd_and_non_spmd_callee @@ -413,7 +413,7 @@ define weak void @spmd_and_non_spmd_callee(i1 %c) #0 { ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]] ; AMDGPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA17]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] ; AMDGPU-NEXT: [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable3, ptr @__omp_outlined_not_spmd_amenable ; AMDGPU-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_not_spmd_amenable ; AMDGPU-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] @@ -473,7 +473,7 @@ define weak void @spmd_and_non_spmd_callee(i1 %c) #0 { ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]] ; NVPTX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA17]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] ; NVPTX-NEXT: [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable3, ptr @__omp_outlined_not_spmd_amenable ; NVPTX-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_not_spmd_amenable ; NVPTX-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] @@ -530,11 +530,11 @@ define internal void @__omp_outlined_spmd_amenable3(ptr noalias %.global_tid., p ; AMDGPU-NEXT: call void @__kmpc_free_shared(ptr [[X]], i64 4) #[[ATTR10]] ; AMDGPU-NEXT: ret void ; AMDGPU: for.body: -; AMDGPU-NEXT: store ptr [[X]], ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA25:![0-9]+]] -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA17]] +; AMDGPU-NEXT: store ptr [[X]], ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]] +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable3 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { @@ -551,11 +551,11 @@ define internal void @__omp_outlined_spmd_amenable3(ptr noalias %.global_tid., p ; NVPTX-NEXT: call void @__kmpc_free_shared(ptr [[X]], i64 4) #[[ATTR10]] ; NVPTX-NEXT: ret void ; NVPTX: for.body: -; NVPTX-NEXT: store ptr [[X]], ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA25:![0-9]+]] -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA17]] +; NVPTX-NEXT: store ptr [[X]], ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]] +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] ; entry: %captured_vars_addrs = alloca [1 x ptr], align 8 @@ -587,18 +587,18 @@ define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA17]] +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] ; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA17]] +; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] ; AMDGPU-NEXT: call void @unknown() #[[ATTR7]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA17]] +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] ; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA17]] +; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] ; NVPTX-NEXT: call void @unknown() #[[ATTR7]] ; NVPTX-NEXT: ret void ; @@ -622,7 +622,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA25]] +; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] ; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR10]] ; AMDGPU-NEXT: ret void ; @@ -634,7 +634,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA25]] +; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] ; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR10]] ; NVPTX-NEXT: ret void ; @@ -652,7 +652,7 @@ entry: } ; Function Attrs: alwaysinline convergent norecurse nounwind -define weak void @spmd_callees_metadata(ptr %fp) #0 { +define weak ptx_kernel void @spmd_callees_metadata(ptr %fp) #0 { ; ; ; AMDGPU-LABEL: define {{[^@]+}}@spmd_callees_metadata @@ -668,7 +668,7 @@ define weak void @spmd_callees_metadata(ptr %fp) #0 { ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]] ; AMDGPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA17]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] ; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) ; AMDGPU-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-NEXT: br label [[COMMON_RET]] @@ -686,7 +686,7 @@ define weak void @spmd_callees_metadata(ptr %fp) #0 { ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]] ; NVPTX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA17]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] ; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) ; NVPTX-NEXT: call void @__kmpc_target_deinit() ; NVPTX-NEXT: br label [[COMMON_RET]] @@ -711,7 +711,7 @@ user_code.entry: ; preds = %entry } ; Function Attrs: alwaysinline convergent norecurse nounwind -define weak void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 { +define weak ptx_kernel void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 { ; ; ; AMDGPU-LABEL: define {{[^@]+}}@spmd_and_non_spmd_callees_metadata @@ -757,7 +757,7 @@ define weak void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 { ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]] ; AMDGPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA17]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] ; AMDGPU-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable_external ; AMDGPU-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] ; AMDGPU: 3: @@ -816,7 +816,7 @@ define weak void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 { ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]] ; NVPTX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA17]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] ; NVPTX-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable_external ; NVPTX-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] ; NVPTX: 3: @@ -868,10 +868,10 @@ define void @__omp_outlined_spmd_amenable_external(ptr noalias %.global_tid., pt ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6]] ; AMDGPU-NEXT: ret void ; AMDGPU: for.body: -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA17]] +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr undef, i64 0) ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable_external ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { @@ -885,10 +885,10 @@ define void @__omp_outlined_spmd_amenable_external(ptr noalias %.global_tid., pt ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6]] ; NVPTX-NEXT: ret void ; NVPTX: for.body: -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA17]] +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr undef, i64 0) ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] ; entry: br label %for.cond @@ -1069,7 +1069,6 @@ attributes #10 = { convergent "llvm.assume"="ompx_spmd_amenable" } attributes #11 = { convergent } !omp_offload.info = !{!0, !1, !2, !3, !4, !5} -!nvvm.annotations = !{!6, !7, !8, !9, !10, !11} !llvm.module.flags = !{!12, !13, !14, !15, !16} !llvm.ident = !{!17} @@ -1079,12 +1078,6 @@ attributes #11 = { convergent } !3 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} !4 = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} !5 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} -!6 = !{ptr @spmd_callees, !"kernel", i32 1} -!7 = !{ptr @spmd_and_non_spmd_callees_metadata, !"kernel", i32 1} -!8 = !{ptr @spmd_and_non_spmd_callee, !"kernel", i32 1} -!9 = !{ptr @spmd_callees_metadata, !"kernel", i32 1} -!10 = !{i32 1} -!11 = !{i32 1} !12 = !{i32 1, !"wchar_size", i32 4} !13 = !{i32 7, !"openmp", i32 50} !14 = !{i32 7, !"openmp-device", i32 50} @@ -1139,29 +1132,24 @@ attributes #11 = { convergent } ; AMDGPU: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} ; AMDGPU: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} ; AMDGPU: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} -; AMDGPU: [[META6:![0-9]+]] = !{ptr @spmd_callees, !"kernel", i32 1} -; AMDGPU: [[META7:![0-9]+]] = !{ptr @spmd_and_non_spmd_callees_metadata, !"kernel", i32 1} -; AMDGPU: [[META8:![0-9]+]] = !{ptr @spmd_and_non_spmd_callee, !"kernel", i32 1} -; AMDGPU: [[META9:![0-9]+]] = !{ptr @spmd_callees_metadata, !"kernel", i32 1} -; AMDGPU: [[META10:![0-9]+]] = !{i32 1} -; AMDGPU: [[META11:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; AMDGPU: [[META12:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; AMDGPU: [[META13:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; AMDGPU: [[META14:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} -; AMDGPU: [[META15:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} -; AMDGPU: [[META16:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -; AMDGPU: [[TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} -; AMDGPU: [[META18]] = !{!"int", [[META19:![0-9]+]], i64 0} -; AMDGPU: [[META19]] = !{!"omnipotent char", [[META20:![0-9]+]], i64 0} -; AMDGPU: [[META20]] = !{!"Simple C/C++ TBAA"} -; AMDGPU: [[LOOP21]] = distinct !{[[LOOP21]], [[META22:![0-9]+]], [[META23:![0-9]+]]} -; AMDGPU: [[META22]] = !{!"llvm.loop.mustprogress"} -; AMDGPU: [[META23]] = !{!"llvm.loop.unroll.disable"} -; AMDGPU: [[LOOP24]] = distinct !{[[LOOP24]], [[META22]], [[META23]]} -; AMDGPU: [[TBAA25]] = !{[[META26:![0-9]+]], [[META26]], i64 0} -; AMDGPU: [[META26]] = !{!"any pointer", [[META19]], i64 0} -; AMDGPU: [[LOOP27]] = distinct !{[[LOOP27]], [[META22]], [[META23]]} -; AMDGPU: [[LOOP28]] = distinct !{[[LOOP28]], [[META22]], [[META23]]} +; AMDGPU: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; AMDGPU: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; AMDGPU: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; AMDGPU: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} +; AMDGPU: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} +; AMDGPU: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +; AMDGPU: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +; AMDGPU: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0} +; AMDGPU: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0} +; AMDGPU: [[META15]] = !{!"Simple C/C++ TBAA"} +; AMDGPU: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]} +; AMDGPU: [[META17]] = !{!"llvm.loop.mustprogress"} +; AMDGPU: [[META18]] = !{!"llvm.loop.unroll.disable"} +; AMDGPU: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]} +; AMDGPU: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} +; AMDGPU: [[META21]] = !{!"any pointer", [[META14]], i64 0} +; AMDGPU: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]} +; AMDGPU: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]} ;. ; NVPTX: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"", i32 74, i32 5} ; NVPTX: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} @@ -1169,27 +1157,22 @@ attributes #11 = { convergent } ; NVPTX: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} ; NVPTX: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} ; NVPTX: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} -; NVPTX: [[META6:![0-9]+]] = !{ptr @spmd_callees, !"kernel", i32 1} -; NVPTX: [[META7:![0-9]+]] = !{ptr @spmd_and_non_spmd_callees_metadata, !"kernel", i32 1} -; NVPTX: [[META8:![0-9]+]] = !{ptr @spmd_and_non_spmd_callee, !"kernel", i32 1} -; NVPTX: [[META9:![0-9]+]] = !{ptr @spmd_callees_metadata, !"kernel", i32 1} -; NVPTX: [[META10:![0-9]+]] = !{i32 1} -; NVPTX: [[META11:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; NVPTX: [[META12:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; NVPTX: [[META13:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; NVPTX: [[META14:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} -; NVPTX: [[META15:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} -; NVPTX: [[META16:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -; NVPTX: [[TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} -; NVPTX: [[META18]] = !{!"int", [[META19:![0-9]+]], i64 0} -; NVPTX: [[META19]] = !{!"omnipotent char", [[META20:![0-9]+]], i64 0} -; NVPTX: [[META20]] = !{!"Simple C/C++ TBAA"} -; NVPTX: [[LOOP21]] = distinct !{[[LOOP21]], [[META22:![0-9]+]], [[META23:![0-9]+]]} -; NVPTX: [[META22]] = !{!"llvm.loop.mustprogress"} -; NVPTX: [[META23]] = !{!"llvm.loop.unroll.disable"} -; NVPTX: [[LOOP24]] = distinct !{[[LOOP24]], [[META22]], [[META23]]} -; NVPTX: [[TBAA25]] = !{[[META26:![0-9]+]], [[META26]], i64 0} -; NVPTX: [[META26]] = !{!"any pointer", [[META19]], i64 0} -; NVPTX: [[LOOP27]] = distinct !{[[LOOP27]], [[META22]], [[META23]]} -; NVPTX: [[LOOP28]] = distinct !{[[LOOP28]], [[META22]], [[META23]]} +; NVPTX: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; NVPTX: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; NVPTX: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; NVPTX: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} +; NVPTX: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} +; NVPTX: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +; NVPTX: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +; NVPTX: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0} +; NVPTX: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0} +; NVPTX: [[META15]] = !{!"Simple C/C++ TBAA"} +; NVPTX: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]} +; NVPTX: [[META17]] = !{!"llvm.loop.mustprogress"} +; NVPTX: [[META18]] = !{!"llvm.loop.unroll.disable"} +; NVPTX: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]} +; NVPTX: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} +; NVPTX: [[META21]] = !{!"any pointer", [[META14]], i64 0} +; NVPTX: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]} +; NVPTX: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]} ;. diff --git a/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll b/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll index f28f61e053275c..1cfce147ac81ec 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll @@ -57,7 +57,7 @@ target triple = "nvptx64" ; CHECK-DISABLE-SPMDIZATION: @__omp_offloading_2b_10393b5_generic_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } ; CHECK-DISABLE-SPMDIZATION: @__omp_outlined___wrapper.ID = private constant i8 undef ;. -define weak void @__omp_offloading_2b_10393b5_spmd_l12(ptr %dyn) #0 { +define weak ptx_kernel void @__omp_offloading_2b_10393b5_spmd_l12(ptr %dyn) #0 { ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_spmd_l12 ; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -147,7 +147,7 @@ define weak i32 @__kmpc_target_init(ptr, ptr) { declare void @__kmpc_target_deinit() ; Function Attrs: convergent noinline norecurse nounwind -define weak void @__omp_offloading_2b_10393b5_generic_l20(ptr %dyn) #0 { +define weak ptx_kernel void @__omp_offloading_2b_10393b5_generic_l20(ptr %dyn) #0 { ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_generic_l20 ; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -397,14 +397,11 @@ attributes #4 = { alwaysinline } attributes #5 = { convergent } !omp_offload.info = !{!0, !1} -!nvvm.annotations = !{!2, !3} !llvm.module.flags = !{!4, !5, !6, !7, !8} !llvm.ident = !{!9} !0 = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0} !1 = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1} -!2 = !{ptr @__omp_offloading_2b_10393b5_spmd_l12, !"kernel", i32 1} -!3 = !{ptr @__omp_offloading_2b_10393b5_generic_l20, !"kernel", i32 1} !4 = !{i32 1, !"wchar_size", i32 4} !5 = !{i32 7, !"openmp", i32 50} !6 = !{i32 7, !"openmp-device", i32 50} @@ -434,23 +431,19 @@ attributes #5 = { convergent } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0} ; CHECK: [[META1:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1} -; CHECK: [[META2:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_spmd_l12, !"kernel", i32 1} -; CHECK: [[META3:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_generic_l20, !"kernel", i32 1} -; CHECK: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; CHECK: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; CHECK: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CHECK: [[META7:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} -; CHECK: [[META8:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} -; CHECK: [[META9:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; CHECK: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} +; CHECK: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} +; CHECK: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} ;. ; CHECK-DISABLE-SPMDIZATION: [[META0:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0} ; CHECK-DISABLE-SPMDIZATION: [[META1:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1} -; CHECK-DISABLE-SPMDIZATION: [[META2:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_spmd_l12, !"kernel", i32 1} -; CHECK-DISABLE-SPMDIZATION: [[META3:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_generic_l20, !"kernel", i32 1} -; CHECK-DISABLE-SPMDIZATION: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; CHECK-DISABLE-SPMDIZATION: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; CHECK-DISABLE-SPMDIZATION: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CHECK-DISABLE-SPMDIZATION: [[META7:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} -; CHECK-DISABLE-SPMDIZATION: [[META8:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} -; CHECK-DISABLE-SPMDIZATION: [[META9:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +; CHECK-DISABLE-SPMDIZATION: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; CHECK-DISABLE-SPMDIZATION: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; CHECK-DISABLE-SPMDIZATION: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; CHECK-DISABLE-SPMDIZATION: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} +; CHECK-DISABLE-SPMDIZATION: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} +; CHECK-DISABLE-SPMDIZATION: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} ;. diff --git a/llvm/test/Transforms/OpenMP/spmdization_remarks.ll b/llvm/test/Transforms/OpenMP/spmdization_remarks.ll index f5a4cea9a841cb..ef36937bc57348 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_remarks.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_remarks.ll @@ -62,7 +62,7 @@ target triple = "nvptx64" ; Function Attrs: convergent norecurse nounwind -define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11(ptr %dyn) local_unnamed_addr #0 !dbg !15 { +define weak ptx_kernel void @__omp_offloading_2a_d80d3d_test_fallback_l11(ptr %dyn) local_unnamed_addr #0 !dbg !15 { entry: %captured_vars_addrs.i.i = alloca [0 x ptr], align 8 %0 = call i32 @__kmpc_target_init(ptr nonnull @__omp_offloading_2a_d80d3d_test_fallback_l11_kernel_environment, ptr %dyn) #3, !dbg !18 @@ -107,7 +107,7 @@ declare i32 @__kmpc_global_thread_num(ptr) local_unnamed_addr #3 declare void @__kmpc_target_deinit() local_unnamed_addr ; Function Attrs: norecurse nounwind -define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20(ptr %dyn) local_unnamed_addr #4 !dbg !32 { +define weak ptx_kernel void @__omp_offloading_2a_d80d3d_test_no_fallback_l20(ptr %dyn) local_unnamed_addr #4 !dbg !32 { entry: %captured_vars_addrs.i2.i = alloca [0 x ptr], align 8 %0 = call i32 @__kmpc_target_init(ptr nonnull @__omp_offloading_2a_d80d3d_test_no_fallback_l20_kernel_environment, ptr %dyn) #3, !dbg !33 @@ -175,7 +175,6 @@ attributes #7 = { "llvm.assume"="ompx_spmd_amenable" } !llvm.dbg.cu = !{!0} !omp_offload.info = !{!3, !4} -!nvvm.annotations = !{!5, !6} !llvm.module.flags = !{!7, !8, !9, !10, !11, !12, !13} !llvm.ident = !{!14} @@ -184,8 +183,6 @@ attributes #7 = { "llvm.assume"="ompx_spmd_amenable" } !2 = !{} !3 = !{i32 0, i32 42, i32 14159165, !"test_no_fallback", i32 20, i32 1} !4 = !{i32 0, i32 42, i32 14159165, !"test_fallback", i32 11, i32 0} -!5 = !{ptr @__omp_offloading_2a_d80d3d_test_fallback_l11, !"kernel", i32 1} -!6 = !{ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20, !"kernel", i32 1} !7 = !{i32 7, !"Dwarf Version", i32 2} !8 = !{i32 2, !"Debug Info Version", i32 3} !9 = !{i32 1, !"wchar_size", i32 4} diff --git a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll index 5e2abbae1811c6..2842dfd030b114 100644 --- a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll +++ b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll @@ -53,7 +53,7 @@ target triple = "amdgcn-amd-amdhsa" ; CHECK: @str = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1 ; CHECK: @kernel_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null } ;. -define void @kernel(ptr %dyn) "kernel" { +define amdgpu_kernel void @kernel(ptr %dyn) "kernel" { ; ; TUNIT: Function Attrs: norecurse ; TUNIT-LABEL: define {{[^@]+}}@kernel @@ -144,7 +144,7 @@ define void @test_assume() { } ; We can't ignore the sync, hence this might store 2 into %p -define void @kernel2(ptr %p) "kernel" { +define amdgpu_kernel void @kernel2(ptr %p) "kernel" { ; CHECK-LABEL: define {{[^@]+}}@kernel2 ; CHECK-SAME: (ptr [[P:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: store i32 1, ptr addrspace(3) @X, align 4 @@ -163,7 +163,7 @@ define void @kernel2(ptr %p) "kernel" { } ; We can't ignore the sync, hence this might store 2 into %p -define void @kernel3(ptr %p) "kernel" { +define amdgpu_kernel void @kernel3(ptr %p) "kernel" { ; TUNIT-LABEL: define {{[^@]+}}@kernel3 ; TUNIT-SAME: (ptr [[P:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: store i32 1, ptr addrspace(3) @X, align 4 @@ -199,7 +199,7 @@ define void @sync_def() { ret void } -define void @kernel4a1(i1 %c) "kernel" { +define amdgpu_kernel void @kernel4a1(i1 %c) "kernel" { ; TUNIT-LABEL: define {{[^@]+}}@kernel4a1 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: store i32 0, ptr addrspace(3) @QA1, align 4 @@ -242,7 +242,7 @@ S: } ; We should not replace the load or delete the second store. -define void @kernel4b1(i1 %c) "kernel" { +define amdgpu_kernel void @kernel4b1(i1 %c) "kernel" { ; TUNIT-LABEL: define {{[^@]+}}@kernel4b1 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: store i32 0, ptr addrspace(3) @QB1, align 4 @@ -281,7 +281,7 @@ S: ret void } -define void @kernel4a2(i1 %c) "kernel" { +define amdgpu_kernel void @kernel4a2(i1 %c) "kernel" { ; TUNIT-LABEL: define {{[^@]+}}@kernel4a2 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] @@ -317,7 +317,7 @@ S: } ; FIXME: We should not replace the load with undef. -define void @kernel4b2(i1 %c) "kernel" { +define amdgpu_kernel void @kernel4b2(i1 %c) "kernel" { ; TUNIT-LABEL: define {{[^@]+}}@kernel4b2 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] @@ -349,7 +349,7 @@ S: ret void } -define void @kernel4a3(i1 %c) "kernel" { +define amdgpu_kernel void @kernel4a3(i1 %c) "kernel" { ; TUNIT-LABEL: define {{[^@]+}}@kernel4a3 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: store i32 0, ptr addrspace(3) @QA3, align 4 @@ -401,7 +401,7 @@ S: } ; The load of QB3 should not be simplified to 0. -define void @kernel4b3(i1 %c) "kernel" { +define amdgpu_kernel void @kernel4b3(i1 %c) "kernel" { ; TUNIT-LABEL: define {{[^@]+}}@kernel4b3 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: store i32 0, ptr addrspace(3) @QB3, align 4 @@ -453,7 +453,7 @@ S: } -define void @kernel4c1(i1 %c) "kernel" { +define amdgpu_kernel void @kernel4c1(i1 %c) "kernel" { ; TUNIT: Function Attrs: norecurse ; TUNIT-LABEL: define {{[^@]+}}@kernel4c1 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { @@ -488,7 +488,7 @@ S: } ; We should not replace the load or delete the second store. -define void @kernel4d1(i1 %c) "kernel" { +define amdgpu_kernel void @kernel4d1(i1 %c) "kernel" { ; TUNIT: Function Attrs: norecurse ; TUNIT-LABEL: define {{[^@]+}}@kernel4d1 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { @@ -529,7 +529,7 @@ S: ret void } -define void @kernel4c2(i1 %c) "kernel" { +define amdgpu_kernel void @kernel4c2(i1 %c) "kernel" { ; TUNIT: Function Attrs: norecurse ; TUNIT-LABEL: define {{[^@]+}}@kernel4c2 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { @@ -563,7 +563,7 @@ S: } ; We should not replace the load with undef. -define void @kernel4d2(i1 %c) "kernel" { +define amdgpu_kernel void @kernel4d2(i1 %c) "kernel" { ; TUNIT: Function Attrs: norecurse ; TUNIT-LABEL: define {{[^@]+}}@kernel4d2 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { @@ -595,7 +595,7 @@ S: ret void } -define void @kernel4c3(i1 %c) "kernel" { +define amdgpu_kernel void @kernel4c3(i1 %c) "kernel" { ; TUNIT: Function Attrs: norecurse ; TUNIT-LABEL: define {{[^@]+}}@kernel4c3 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { @@ -629,7 +629,7 @@ S: } ; We should not replace the load with undef. -define void @kernel4d3(i1 %c) "kernel" { +define amdgpu_kernel void @kernel4d3(i1 %c) "kernel" { ; TUNIT: Function Attrs: norecurse ; TUNIT-LABEL: define {{[^@]+}}@kernel4d3 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { @@ -661,7 +661,7 @@ S: ret void } -define void @kernel_unknown_and_aligned1(i1 %c) "kernel" { +define amdgpu_kernel void @kernel_unknown_and_aligned1(i1 %c) "kernel" { ; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned1 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] @@ -700,7 +700,7 @@ S: ret void } -define void @kernel_unknown_and_aligned2(i1 %c) "kernel" { +define amdgpu_kernel void @kernel_unknown_and_aligned2(i1 %c) "kernel" { ; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned2 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] @@ -741,7 +741,7 @@ S: ret void } -define void @kernel_unknown_and_aligned3(i1 %c) "kernel" { +define amdgpu_kernel void @kernel_unknown_and_aligned3(i1 %c) "kernel" { ; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned3 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] @@ -782,7 +782,7 @@ S: ret void } -define void @kernel_unknown_and_not_aligned1(i1 %c) "kernel" { +define amdgpu_kernel void @kernel_unknown_and_not_aligned1(i1 %c) "kernel" { ; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_not_aligned1 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] @@ -828,29 +828,9 @@ declare void @__kmpc_target_deinit() nocallback declare void @llvm.assume(i1) !llvm.module.flags = !{!0, !1} -!nvvm.annotations = !{!2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20} !0 = !{i32 7, !"openmp", i32 50} !1 = !{i32 7, !"openmp-device", i32 50} -!2 = !{ptr @kernel, !"kernel", i32 1} -!3 = !{ptr @kernel2, !"kernel", i32 1} -!4 = !{ptr @kernel3, !"kernel", i32 1} -!5 = !{ptr @kernel4a1, !"kernel", i32 1} -!6 = !{ptr @kernel4b1, !"kernel", i32 1} -!7 = !{ptr @kernel4a2, !"kernel", i32 1} -!8 = !{ptr @kernel4b2, !"kernel", i32 1} -!9 = !{ptr @kernel4a3, !"kernel", i32 1} -!10 = !{ptr @kernel4b3, !"kernel", i32 1} -!11 = !{ptr @kernel4c1, !"kernel", i32 1} -!12 = !{ptr @kernel4d1, !"kernel", i32 1} -!13 = !{ptr @kernel4c2, !"kernel", i32 1} -!14 = !{ptr @kernel4d2, !"kernel", i32 1} -!15 = !{ptr @kernel4c3, !"kernel", i32 1} -!16 = !{ptr @kernel4d3, !"kernel", i32 1} -!17 = !{ptr @kernel_unknown_and_aligned1, !"kernel", i32 1} -!18 = !{ptr @kernel_unknown_and_aligned2, !"kernel", i32 1} -!19 = !{ptr @kernel_unknown_and_aligned3, !"kernel", i32 1} -!20 = !{ptr @kernel_unknown_and_not_aligned1, !"kernel", i32 1} ;. ; TUNIT: attributes #[[ATTR0]] = { norecurse "kernel" } @@ -872,45 +852,7 @@ declare void @llvm.assume(i1) ;. ; TUNIT: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; TUNIT: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; TUNIT: [[META2:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1} -; TUNIT: [[META3:![0-9]+]] = !{ptr @kernel2, !"kernel", i32 1} -; TUNIT: [[META4:![0-9]+]] = !{ptr @kernel3, !"kernel", i32 1} -; TUNIT: [[META5:![0-9]+]] = !{ptr @kernel4a1, !"kernel", i32 1} -; TUNIT: [[META6:![0-9]+]] = !{ptr @kernel4b1, !"kernel", i32 1} -; TUNIT: [[META7:![0-9]+]] = !{ptr @kernel4a2, !"kernel", i32 1} -; TUNIT: [[META8:![0-9]+]] = !{ptr @kernel4b2, !"kernel", i32 1} -; TUNIT: [[META9:![0-9]+]] = !{ptr @kernel4a3, !"kernel", i32 1} -; TUNIT: [[META10:![0-9]+]] = !{ptr @kernel4b3, !"kernel", i32 1} -; TUNIT: [[META11:![0-9]+]] = !{ptr @kernel4c1, !"kernel", i32 1} -; TUNIT: [[META12:![0-9]+]] = !{ptr @kernel4d1, !"kernel", i32 1} -; TUNIT: [[META13:![0-9]+]] = !{ptr @kernel4c2, !"kernel", i32 1} -; TUNIT: [[META14:![0-9]+]] = !{ptr @kernel4d2, !"kernel", i32 1} -; TUNIT: [[META15:![0-9]+]] = !{ptr @kernel4c3, !"kernel", i32 1} -; TUNIT: [[META16:![0-9]+]] = !{ptr @kernel4d3, !"kernel", i32 1} -; TUNIT: [[META17:![0-9]+]] = !{ptr @kernel_unknown_and_aligned1, !"kernel", i32 1} -; TUNIT: [[META18:![0-9]+]] = !{ptr @kernel_unknown_and_aligned2, !"kernel", i32 1} -; TUNIT: [[META19:![0-9]+]] = !{ptr @kernel_unknown_and_aligned3, !"kernel", i32 1} -; TUNIT: [[META20:![0-9]+]] = !{ptr @kernel_unknown_and_not_aligned1, !"kernel", i32 1} ;. ; CGSCC: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CGSCC: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CGSCC: [[META2:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1} -; CGSCC: [[META3:![0-9]+]] = !{ptr @kernel2, !"kernel", i32 1} -; CGSCC: [[META4:![0-9]+]] = !{ptr @kernel3, !"kernel", i32 1} -; CGSCC: [[META5:![0-9]+]] = !{ptr @kernel4a1, !"kernel", i32 1} -; CGSCC: [[META6:![0-9]+]] = !{ptr @kernel4b1, !"kernel", i32 1} -; CGSCC: [[META7:![0-9]+]] = !{ptr @kernel4a2, !"kernel", i32 1} -; CGSCC: [[META8:![0-9]+]] = !{ptr @kernel4b2, !"kernel", i32 1} -; CGSCC: [[META9:![0-9]+]] = !{ptr @kernel4a3, !"kernel", i32 1} -; CGSCC: [[META10:![0-9]+]] = !{ptr @kernel4b3, !"kernel", i32 1} -; CGSCC: [[META11:![0-9]+]] = !{ptr @kernel4c1, !"kernel", i32 1} -; CGSCC: [[META12:![0-9]+]] = !{ptr @kernel4d1, !"kernel", i32 1} -; CGSCC: [[META13:![0-9]+]] = !{ptr @kernel4c2, !"kernel", i32 1} -; CGSCC: [[META14:![0-9]+]] = !{ptr @kernel4d2, !"kernel", i32 1} -; CGSCC: [[META15:![0-9]+]] = !{ptr @kernel4c3, !"kernel", i32 1} -; CGSCC: [[META16:![0-9]+]] = !{ptr @kernel4d3, !"kernel", i32 1} -; CGSCC: [[META17:![0-9]+]] = !{ptr @kernel_unknown_and_aligned1, !"kernel", i32 1} -; CGSCC: [[META18:![0-9]+]] = !{ptr @kernel_unknown_and_aligned2, !"kernel", i32 1} -; CGSCC: [[META19:![0-9]+]] = !{ptr @kernel_unknown_and_aligned3, !"kernel", i32 1} -; CGSCC: [[META20:![0-9]+]] = !{ptr @kernel_unknown_and_not_aligned1, !"kernel", i32 1} ;. diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 42616155f0cc32..64fef02c8f3f8c 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -7622,26 +7622,6 @@ TEST_F(OpenMPIRBuilderTest, createGPUOffloadEntry) { /* Size = */ 0, /* Flags = */ 0, GlobalValue::WeakAnyLinkage); - // Check nvvm.annotations only created for GPU kernels - NamedMDNode *MD = M->getNamedMetadata("nvvm.annotations"); - EXPECT_NE(MD, nullptr); - EXPECT_EQ(MD->getNumOperands(), 1u); - - MDNode *Annotations = MD->getOperand(0); - EXPECT_EQ(Annotations->getNumOperands(), 3u); - - Constant *ConstVal = - dyn_cast(Annotations->getOperand(0))->getValue(); - EXPECT_TRUE(isa(Fn)); - EXPECT_EQ(ConstVal, cast(Fn)); - - EXPECT_TRUE(Annotations->getOperand(1).equalsStr("kernel")); - - EXPECT_TRUE(mdconst::hasa(Annotations->getOperand(2))); - APInt IntVal = - mdconst::extract(Annotations->getOperand(2))->getValue(); - EXPECT_EQ(IntVal, 1); - // Check kernel attributes EXPECT_TRUE(Fn->hasFnAttribute("kernel")); EXPECT_TRUE(Fn->hasFnAttribute(Attribute::MustProgress));