Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCLomatic] Enable the migration of API cudaLaunchHostFunc #2667

Open
wants to merge 6 commits into
base: SYCLomatic
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions clang/lib/DPCT/RuleInfra/MapNames.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,9 @@ void MapNames::setExplicitNamespaceMap(
{"cudaStream_t",
std::make_shared<TypeNameRule>(getDpctNamespace() + "queue_ptr",
HelperFeatureEnum::device_ext)},
{"cudaHostFn_t",
std::make_shared<TypeNameRule>(getDpctNamespace() + "host_callback",
HelperFeatureEnum::device_ext)},
{"CUstream",
std::make_shared<TypeNameRule>(getDpctNamespace() + "queue_ptr",
HelperFeatureEnum::device_ext)},
Expand Down
110 changes: 72 additions & 38 deletions clang/lib/DPCT/RulesLang/RulesLang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ void TypeInDeclRule::registerMatcher(MatchFinder &MF) {
"cublasLtMatmulHeuristicResult_t", "CUjit_target",
"cublasLtMatrixTransformDesc_t", "cudaGraphicsMapFlags",
"cudaGraphicsRegisterFlags", "cudaExternalMemoryHandleType",
"CUstreamCallback"))))))
"CUstreamCallback", "cudaHostFn_t"))))))
.bind("cudaTypeDef"),
this);

Expand Down Expand Up @@ -4622,7 +4622,8 @@ void KernelCallRule::registerMatcher(ast_matchers::MatchFinder &MF) {
this);

auto launchAPIName = [&]() {
return hasAnyName("cudaLaunchKernel", "cudaLaunchCooperativeKernel");
return hasAnyName("cudaLaunchKernel", "cudaLaunchCooperativeKernel",
"cudaLaunchHostFunc");
};
MF.addMatcher(
callExpr(allOf(callee(functionDecl(launchAPIName())), parentStmt()))
Expand Down Expand Up @@ -4837,56 +4838,89 @@ void KernelCallRule::runRule(
LaunchKernelCall = getNodeAsType<CallExpr>(Result, "launchUsed");
IsAssigned = true;
}
if (!LaunchKernelCall)
auto FD = LaunchKernelCall->getDirectCallee();
if (!LaunchKernelCall || !FD)
return;
const Expr *CalleeDRE = LaunchKernelCall->getArg(0);
bool IsFuncTypeErased = true;
auto QT = CalleeDRE->getType();

if (QT->isPointerType()) {
QT = QT->getPointeeType();
}
if (QT->isFunctionType()) {
IsFuncTypeErased = false;
}

if (!getAddressedRef(CalleeDRE)) {
if (IsFuncTypeErased) {
DpctGlobalInfo::setCVersionCUDALaunchUsed();
std::string FuncName = FD->getNameAsString();
std::cout << FuncName << std::endl;
if (FuncName == "cudaLaunchHostFunc") {
if (DpctGlobalInfo::getUsmLevel() != UsmLevel::UL_Restricted) {
report(LaunchKernelCall->getBeginLoc(), Diagnostics::API_NOT_MIGRATED,
false, "cudaLaunchHostFunc");
return;
}
std::string ReplStr;
llvm::raw_string_ostream OS(ReplStr);
std::string IndentStr = getIndent(LaunchKernelCall->getBeginLoc(),
DpctGlobalInfo::getSourceManager())
.str();
if (IsAssigned) {
OS << MapNames::getCheckErrorMacroName() << "(";
}
OS << MapNames::getDpctNamespace() << "kernel_launcher::launch(";
size_t ArgsNum = LaunchKernelCall->getNumArgs();
for (size_t i = 0; i < ArgsNum; i++) {
if (auto Arg = LaunchKernelCall->getArg(i)) {
if (i == 0) {
if (auto E = getAddressedRef(CalleeDRE, false, nullptr)) {
OS << ExprAnalysis::ref(E);
OS << ExprAnalysis::ref(LaunchKernelCall->getArg(0))
<< "->submit([&](sycl::handler &cgh) {" << getNL() << IndentStr
<< " cgh.host_task([=](){" << getNL() << IndentStr << " "
<< ExprAnalysis::ref(LaunchKernelCall->getArg(1)) << "("
<< ExprAnalysis::ref(LaunchKernelCall->getArg(2)) << ");" << getNL()
<< IndentStr << " });" << getNL() << IndentStr << "})";
if (IsAssigned) {
OS << ")";
}
auto Repl = new ReplaceStmt(LaunchKernelCall, OS.str());
Repl->setBlockLevelFormatFlag();
emplaceTransformation(Repl);
return;
} else {
const Expr *CalleeDRE = LaunchKernelCall->getArg(0);
bool IsFuncTypeErased = true;
auto QT = CalleeDRE->getType();

if (QT->isPointerType()) {
QT = QT->getPointeeType();
}
if (QT->isFunctionType()) {
IsFuncTypeErased = false;
}

if (!getAddressedRef(CalleeDRE)) {
if (IsFuncTypeErased) {
DpctGlobalInfo::setCVersionCUDALaunchUsed();
}
std::string ReplStr;
llvm::raw_string_ostream OS(ReplStr);
if (IsAssigned) {
OS << MapNames::getCheckErrorMacroName() << "(";
}
OS << MapNames::getDpctNamespace() << "kernel_launcher::launch(";
size_t ArgsNum = LaunchKernelCall->getNumArgs();
for (size_t i = 0; i < ArgsNum; i++) {
if (auto Arg = LaunchKernelCall->getArg(i)) {
if (i == 0) {
if (auto E = getAddressedRef(CalleeDRE, false, nullptr)) {
OS << ExprAnalysis::ref(E);
} else {
OS << ExprAnalysis::ref(Arg);
}
} else {
OS << ExprAnalysis::ref(Arg);
OS << ", " << ExprAnalysis::ref(Arg);
}
} else {
OS << ", " << ExprAnalysis::ref(Arg);
}
}
}
OS << ")";
if (IsAssigned) {
OS << ")";
if (IsAssigned) {
OS << ")";
}
emplaceTransformation(new ReplaceStmt(LaunchKernelCall, OS.str()));
return;
}
emplaceTransformation(new ReplaceStmt(LaunchKernelCall, OS.str()));
return;
}

if (!IsAssigned)
findAndRemoveTrailingSemicolon(LaunchKernelCall, Result);
if (DpctGlobalInfo::getInstance().buildLaunchKernelInfo(LaunchKernelCall,
IsAssigned)) {
emplaceTransformation(new ReplaceStmt(LaunchKernelCall, true, false, ""));
if (!IsAssigned)
findAndRemoveTrailingSemicolon(LaunchKernelCall, Result);
if (DpctGlobalInfo::getInstance().buildLaunchKernelInfo(LaunchKernelCall,
IsAssigned)) {
emplaceTransformation(
new ReplaceStmt(LaunchKernelCall, true, false, ""));
}
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/DPCT/SrcAPI/APINames.inc
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ ENTRY(cudaGridDependencySynchronize, cudaGridDependencySynchronize, false, NO_FL
ENTRY(cudaLaunchCooperativeKernel, cudaLaunchCooperativeKernel, true, NO_FLAG, P4, "Partial: DPCT1007")
ENTRY(cudaLaunchCooperativeKernelMultiDevice, cudaLaunchCooperativeKernelMultiDevice, false, NO_FLAG, P4, "comment")
ENTRY(cudaLaunchDevice, cudaLaunchDevice, false, NO_FLAG, P4, "comment")
ENTRY(cudaLaunchHostFunc, cudaLaunchHostFunc, false, NO_FLAG, P4, "comment")
ENTRY(cudaLaunchHostFunc, cudaLaunchHostFunc, true, NO_FLAG, P4, "comment")
ENTRY(cudaLaunchKernel, cudaLaunchKernel, true, NO_FLAG, P0, "Partial: DPCT1007, success only when directly using of kernel function name")
ENTRY(cudaLaunchKernelExC, cudaLaunchKernelExC, false, NO_FLAG, P4, "comment")
ENTRY(cudaSetDoubleForDevice, cudaSetDoubleForDevice, false, NO_FLAG, P0, "comment")
Expand Down
2 changes: 2 additions & 0 deletions clang/runtime/dpct-rt/include/dpct/kernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ namespace dpct {
typedef void (*kernel_functor)(sycl::queue &, const sycl::nd_range<3> &,
unsigned int, void **, void **);

typedef void (*host_callback)(void *);

struct kernel_function_info {
int max_work_group_size = 0;
int shared_size_bytes = 0;
Expand Down
28 changes: 28 additions & 0 deletions clang/test/dpct/launch-kernel-usm.cu
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
// UNSUPPORTED: cuda-8.0, cuda-9.0, cuda-9.1, cuda-9.2
// UNSUPPORTED: v8.0, v9.0, v9.1, v9.2
// RUN: dpct --format-range=none -out-root %T/launch-kernel-usm %s --cuda-include-path="%cuda-path/include" -- -x cuda --cuda-host-only -std=c++14
// RUN: FileCheck %s --match-full-lines --input-file %T/launch-kernel-usm/launch-kernel-usm.dp.cpp

#include<cuda_runtime.h>
#include<iostream>

// CHECK: void template_device(T *d, T *s) {
template<class T>
__device__ void template_device(T *d) {
Expand All @@ -22,6 +27,11 @@ __global__ void kernel(int *d, cudaTextureObject_t tex) {
tex1D(d + gtid, tex, gtid);
}

void hostCallback(void *userData) {
const char *msg = static_cast<const char*>(userData);
std::cout << "Host callback executed. Message: " << msg << std::endl;
}

int main() {
int *d_data;
cudaMalloc(&d_data, sizeof(int));
Expand Down Expand Up @@ -87,6 +97,24 @@ int main() {
// CHECK: dpct::kernel_launcher::launch(kernel_array[10], dpct::dim3(16), dpct::dim3(16), args, 0, 0);
cudaLaunchKernel(kernel_array[10], dim3(16), dim3(16), args, 0, 0);

cudaError_t err;
const char *message = "Kernel execution finished.";
cudaStream_t stream;
// CHECK: err = DPCT_CHECK_ERROR(stream->submit([&](sycl::handler &cgh) {
// CHECK: cgh.host_task([=](){
// CHECK: hostCallback((void*)message);
// CHECK: });
// CHECK: }));
err = cudaLaunchHostFunc(stream, hostCallback, (void*)message);

// CHECK: dpct::host_callback fn = hostCallback;
cudaHostFn_t fn = hostCallback;
// CHECK: stream->submit([&](sycl::handler &cgh) {
// CHECK: cgh.host_task([=](){
// CHECK: fn((void*)message);
// CHECK: });
cudaLaunchHostFunc(stream, fn, (void*)message);

cudaStreamDestroy(stream);
cudaDestroyTextureObject(tex);
cudaFree(d_data21);
Expand Down