Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[amdgpu] Part4 link bitcode file #7180

Merged
merged 12 commits into from
Jan 20, 2023
6 changes: 6 additions & 0 deletions cmake/TaichiCore.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -511,3 +511,9 @@ if (NOT APPLE)
install(FILES ${CMAKE_SOURCE_DIR}/external/cuda_libdevice/slim_libdevice.10.bc
DESTINATION ${INSTALL_LIB_DIR}/runtime)
endif()

if (TI_WITH_AMDGPU)
file(GLOB AMDGPU_BC_FILES ${CMAKE_SOURCE_DIR}/external/amdgpu_libdevice/*.bc)
install(FILES ${AMDGPU_BC_FILES}
DESTINATION ${INSTALL_LIB_DIR}/runtime)
endif()
Binary file added external/amdgpu_libdevice/ockl.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_abi_version_400.bc
Binary file not shown.
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_daz_opt_off.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_finite_only_off.bc
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_600.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_601.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_602.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_700.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_701.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_702.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_703.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_704.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_705.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_801.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_802.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_803.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_805.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_810.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_900.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_902.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_904.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_906.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_908.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_909.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_90a.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_90c.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_unsafe_math_off.bc
Binary file not shown.
Binary file not shown.
Binary file added external/amdgpu_libdevice/ocml.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/opencl.bc
Binary file not shown.
82 changes: 82 additions & 0 deletions taichi/runtime/llvm/llvm_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@
#include "taichi/rhi/cuda/cuda_context.h"
#endif

#if defined(TI_WITH_AMDGPU)
#include "taichi/rhi/amdgpu/amdgpu_context.h"
#endif

namespace taichi::lang {

using namespace llvm;
Expand Down Expand Up @@ -486,6 +490,32 @@ std::unique_ptr<llvm::Module> TaichiLLVMContext::module_from_file(
// runtime_module->print(llvm::errs(), nullptr);
}

#ifdef TI_WITH_AMDGPU
auto patch_amdgpu_kernel_dim = [&](std::string name, llvm::Value *lhs) {
std::string actual_name;
if (name == "block_dim")
actual_name = "__ockl_get_local_size";
else if (name == "grid_dim")
actual_name = "__ockl_get_num_groups";
else
TI_ERROR("Unknown patch function name");
auto func = module->getFunction(name);
auto actual_func = module->getFunction(actual_name);
if (!func || !actual_func) {
return;
}
func->deleteBody();
auto bb = llvm::BasicBlock::Create(*ctx, "entry", func);
IRBuilder<> builder(*ctx);
builder.SetInsertPoint(bb);
auto dim_ = builder.CreateCall(actual_func->getFunctionType(),
actual_func, {lhs});
auto ret_ = builder.CreateTrunc(dim_, llvm::Type::getInt32Ty(*ctx));
builder.CreateRet(ret_);
TaichiLLVMContext::mark_inline(func);
};
#endif

if (arch_ == Arch::amdgpu) {
module->setTargetTriple("amdgcn-amd-amdhsa");
#ifdef TI_WITH_AMDGPU
Expand All @@ -498,6 +528,12 @@ std::unique_ptr<llvm::Module> TaichiLLVMContext::module_from_file(
function_pass_manager.doFinalization();
patch_intrinsic("thread_idx", llvm::Intrinsic::amdgcn_workitem_id_x);
patch_intrinsic("block_idx", llvm::Intrinsic::amdgcn_workgroup_id_x);

link_module_with_amdgpu_libdevice(module);
patch_amdgpu_kernel_dim(
"block_dim", llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), 0));
patch_amdgpu_kernel_dim(
"grid_dim", llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), 0));
#endif
}
}
Expand Down Expand Up @@ -537,6 +573,52 @@ void TaichiLLVMContext::link_module_with_cuda_libdevice(
}
}

void TaichiLLVMContext::link_module_with_amdgpu_libdevice(
std::unique_ptr<llvm::Module> &module) {
TI_ASSERT(arch_ == Arch::amdgpu);
#if defined(TI_WITH_AMDGPU)
auto isa_version = AMDGPUContext::get_instance().get_mcpu().substr(3, 4);
std::string libdevice_files[] = {"ocml.bc",
"oclc_wavefrontsize64_off.bc",
"ockl.bc",
"oclc_abi_version_400.bc",
"oclc_correctly_rounded_sqrt_off.bc",
"oclc_daz_opt_off.bc",
"oclc_finite_only_off.bc",
"oclc_isa_version_" + isa_version + ".bc",
"oclc_unsafe_math_off.bc",
"opencl.bc"};

for (auto &libdevice : libdevice_files) {
std::string lib_dir = runtime_lib_dir() + "/";
auto libdevice_module = module_from_bitcode_file(lib_dir + libdevice,
get_this_thread_context());

if (libdevice == "ocml.bc")
module->setDataLayout(libdevice_module->getDataLayout());

std::vector<std::string> libdevice_func_names;
for (auto &f : *libdevice_module) {
if (!f.isDeclaration()) {
libdevice_func_names.push_back(f.getName().str());
}
}

for (auto &f : libdevice_module->functions()) {
auto func_ = module->getFunction(f.getName());
if (!func_ && starts_with(f.getName().lower(), "__" + libdevice))
f.setLinkage(llvm::Function::CommonLinkage);
}

bool failed =
llvm::Linker::linkModules(*module, std::move(libdevice_module));
if (failed) {
TI_ERROR("AMDGPU libdevice linking failure.");
}
}
#endif
}

void TaichiLLVMContext::add_struct_module(std::unique_ptr<Module> module,
int tree_id) {
TI_AUTO_PROF;
Expand Down
2 changes: 2 additions & 0 deletions taichi/runtime/llvm/llvm_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ class TaichiLLVMContext {

void link_module_with_cuda_libdevice(std::unique_ptr<llvm::Module> &module);

void link_module_with_amdgpu_libdevice(std::unique_ptr<llvm::Module> &module);

static int num_instructions(llvm::Function *func);

void insert_nvvm_annotation(llvm::Function *func, std::string key, int val);
Expand Down
2 changes: 1 addition & 1 deletion tests/cpp/backends/amdgpu_device_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ TEST(AMDGPU, ConvertFuncParamAddressSpacePass) {
}
}

TEST(AMDGPU, ConvertProgramAndLaunch) {
TEST(AMDGPU, CompileProgramAndLaunch) {
std::string program =
"target datalayout = "
"\"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:"
Expand Down