Skip to content

Commit

Permalink
[OpenMP] Replace nvvm.annotation usage with kernel calling conventions (
Browse files Browse the repository at this point in the history
llvm#122320)

Specifying a kernel with the `ptx_kernel` or `amdgpu_kernel` calling
convention is a more idiomatic and compile-time performant than using
the `nvvm.annoation !"kernel"` metadata.

Transition OMPIRBuilder to use calling conventions for PTX kernels and
no longer emit `nvvm.annoation`. Update OpenMPOpt to work with kernels
specified via calling convention as well as metadata. Update OpenMP
tests to use the calling conventions.
  • Loading branch information
AlexMaclean authored Jan 25, 2025
1 parent d92bac8 commit 07ed818
Show file tree
Hide file tree
Showing 34 changed files with 606 additions and 2,105 deletions.
4 changes: 2 additions & 2 deletions clang/test/OpenMP/assumes_include_nvptx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@

// TODO: Think about teaching the OMPIRBuilder about default attributes as well so the __kmpc* declarations are annotated.

// CHECK: define weak_odr protected void @__omp_offloading_{{.*}}__Z17complex_reductionIfEvv_{{.*}}({{.*}}) [[attr0:#[0-9]]]
// CHECK: define weak_odr protected ptx_kernel void @__omp_offloading_{{.*}}__Z17complex_reductionIfEvv_{{.*}}({{.*}}) [[attr0:#[0-9]]]
// CHECK: call i32 @__kmpc_target_init(
// CHECK: declare noundef float @_Z3sinf(float noundef) [[attr1:#[0-9]*]]
// CHECK: declare void @__kmpc_target_deinit(
// CHECK: define weak_odr protected void @__omp_offloading_{{.*}}__Z17complex_reductionIdEvv_{{.*}}({{.*}}) [[attr0]]
// CHECK: define weak_odr protected ptx_kernel void @__omp_offloading_{{.*}}__Z17complex_reductionIdEvv_{{.*}}({{.*}}) [[attr0]]
// CHECK: %call = call noundef double @_Z3sind(double noundef 0.000000e+00) [[attr2:#[0-9]]]
// CHECK: declare noundef double @_Z3sind(double noundef) [[attr1]]

Expand Down
2 changes: 1 addition & 1 deletion clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ int foo(int n, double *ptr) {
ptr[0]++;
}

// TCHECK: define weak_odr protected void @__omp_offloading_{{.+}}(ptr {{[^,]+}}, ptr noundef [[PTR_IN:%.+]])
// TCHECK: define weak_odr protected ptx_kernel void @__omp_offloading_{{.+}}(ptr {{[^,]+}}, ptr noundef [[PTR_IN:%.+]])
// TCHECK: [[DYN_PTR_ADDR:%.+]] = alloca ptr,
// TCHECK: [[PTR_ADDR:%.+]] = alloca ptr,
// TCHECK-NOT: alloca ptr,
Expand Down
16 changes: 3 additions & 13 deletions llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6468,6 +6468,8 @@ void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
OutlinedFn->setVisibility(GlobalValue::ProtectedVisibility);
if (T.isAMDGCN())
OutlinedFn->setCallingConv(CallingConv::AMDGPU_KERNEL);
else if (T.isNVPTX())
OutlinedFn->setCallingConv(CallingConv::PTX_Kernel);
}
}

Expand Down Expand Up @@ -9223,20 +9225,8 @@ void OpenMPIRBuilder::createOffloadEntry(Constant *ID, Constant *Addr,
if (!Fn)
return;

Module &M = *(Fn->getParent());
LLVMContext &Ctx = M.getContext();

// Get "nvvm.annotations" metadata node.
NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");

Metadata *MDVals[] = {
ConstantAsMetadata::get(Fn), MDString::get(Ctx, "kernel"),
ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Ctx), 1))};
// Append metadata to nvvm.annotations.
MD->addOperand(MDNode::get(Ctx, MDVals));

// Add a function attribute for the kernel.
Fn->addFnAttr(Attribute::get(Ctx, "kernel"));
Fn->addFnAttr("kernel");
if (T.isAMDGCN())
Fn->addFnAttr("uniform-work-group-size", "true");
Fn->addFnAttr(Attribute::MustProgress);
Expand Down
61 changes: 40 additions & 21 deletions llvm/lib/Transforms/IPO/OpenMPOpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include "llvm/Transforms/IPO/OpenMPOpt.h"

#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/EnumeratedArray.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
Expand All @@ -36,6 +37,7 @@
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/IR/Assumptions.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
Expand Down Expand Up @@ -5903,34 +5905,51 @@ bool llvm::omp::isOpenMPKernel(Function &Fn) {
return Fn.hasFnAttribute("kernel");
}

static bool isKernelCC(Function &F) {
switch (F.getCallingConv()) {
default:
return false;
case CallingConv::PTX_Kernel:
case CallingConv::AMDGPU_KERNEL:
case CallingConv::SPIR_KERNEL:
return true;
}
}

KernelSet llvm::omp::getDeviceKernels(Module &M) {
// TODO: Create a more cross-platform way of determining device kernels.
NamedMDNode *MD = M.getNamedMetadata("nvvm.annotations");
KernelSet Kernels;

if (!MD)
return Kernels;
DenseSet<const Function *> SeenKernels;
auto ProcessKernel = [&](Function &KF) {
if (SeenKernels.insert(&KF).second) {
// We are only interested in OpenMP target regions. Others, such as
// kernels generated by CUDA but linked together, are not interesting to
// this pass.
if (isOpenMPKernel(KF)) {
++NumOpenMPTargetRegionKernels;
Kernels.insert(&KF);
} else
++NumNonOpenMPTargetRegionKernels;
}
};

for (auto *Op : MD->operands()) {
if (Op->getNumOperands() < 2)
continue;
MDString *KindID = dyn_cast<MDString>(Op->getOperand(1));
if (!KindID || KindID->getString() != "kernel")
continue;
if (NamedMDNode *MD = M.getNamedMetadata("nvvm.annotations"))
for (auto *Op : MD->operands()) {
if (Op->getNumOperands() < 2)
continue;
MDString *KindID = dyn_cast<MDString>(Op->getOperand(1));
if (!KindID || KindID->getString() != "kernel")
continue;

Function *KernelFn =
mdconst::dyn_extract_or_null<Function>(Op->getOperand(0));
if (!KernelFn)
continue;
if (auto *KernelFn =
mdconst::dyn_extract_or_null<Function>(Op->getOperand(0)))
ProcessKernel(*KernelFn);
}

// We are only interested in OpenMP target regions. Others, such as kernels
// generated by CUDA but linked together, are not interesting to this pass.
if (isOpenMPKernel(*KernelFn)) {
++NumOpenMPTargetRegionKernels;
Kernels.insert(KernelFn);
} else
++NumNonOpenMPTargetRegionKernels;
}
for (Function &F : M)
if (isKernelCC(F))
ProcessKernel(F);

return Kernels;
}
Expand Down
17 changes: 7 additions & 10 deletions llvm/test/Transforms/OpenMP/always_inline_device.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
; CHECK: @G = external global i8
; CHECK: @kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
;.
define weak void @__omp_offloading_fd02_c0934fc2_foo_l4(ptr %dyn) #0 {
define weak ptx_kernel void @__omp_offloading_fd02_c0934fc2_foo_l4(ptr %dyn) #0 {
; CHECK: Function Attrs: norecurse nounwind
; CHECK-LABEL: @__omp_offloading_fd02_c0934fc2_foo_l4(
; CHECK-NEXT: entry:
Expand Down Expand Up @@ -79,12 +79,10 @@ attributes #1 = { convergent nounwind "frame-pointer"="all" "min-legal-vector-wi
attributes #2 = { convergent }

!omp_offload.info = !{!0}
!nvvm.annotations = !{!1}
!llvm.module.flags = !{!2, !3, !4, !5, !6}
!llvm.ident = !{!7}

!0 = !{i32 0, i32 64770, i32 -1064087614, !"foo", i32 4, i32 0}
!1 = !{ptr @__omp_offloading_fd02_c0934fc2_foo_l4, !"kernel", i32 1}
!2 = !{i32 1, !"wchar_size", i32 4}
!3 = !{i32 7, !"openmp", i32 50}
!4 = !{i32 7, !"openmp-device", i32 50}
Expand All @@ -97,11 +95,10 @@ attributes #2 = { convergent }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 -1064087614, !"foo", i32 4, i32 0}
; CHECK: [[META1:![0-9]+]] = !{ptr @__omp_offloading_fd02_c0934fc2_foo_l4, !"kernel", i32 1}
; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
; CHECK: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
; CHECK: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
; CHECK: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
; CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
; CHECK: [[META2:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
; CHECK: [[META4:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
; CHECK: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
; CHECK: [[META6:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
;.
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,6 @@ define linkonce_odr hidden i8 @_ZStplIdESt7complexIT_ERKS2_S4_() local_unnamed_a
ret i8 undef
}

declare void @__omp_offloading_2b_4010cad__ZN11qmcplusplus7ompBLAS17gemv_batched_implIfEEiRiciiPKT_PKS5_iS7_iS5_PKPS3_ii_l148(i64, i64, i64, ptr, ptr, i64, ptr, ptr, ptr, i64) local_unnamed_addr
declare ptx_kernel void @__omp_offloading_2b_4010cad__ZN11qmcplusplus7ompBLAS17gemv_batched_implIfEEiRiciiPKT_PKS5_iS7_iS5_PKPS3_ii_l148(i64, i64, i64, ptr, ptr, i64, ptr, ptr, ptr, i64) local_unnamed_addr

declare dso_local fastcc void @__kmpc_for_static_init_8u() unnamed_addr

!nvvm.annotations = !{!0}

!0 = !{ptr @__omp_offloading_2b_4010cad__ZN11qmcplusplus7ompBLAS17gemv_batched_implIfEEiRiciiPKT_PKS5_iS7_iS5_PKPS3_ii_l148, !"kernel", i32 1}
Loading

0 comments on commit 07ed818

Please sign in to comment.