From 5d19fc9bca2f2ca740f10f8983ee76ad73ca38b4 Mon Sep 17 00:00:00 2001 From: Xiang Li Date: Thu, 1 Sep 2022 23:29:39 -0700 Subject: [PATCH 1/2] [dx12] Drop code for dxil generation. Run llvm coden gen for DirectX backend. No real dxil will output yet. Need add pass to transform the llvm module into the shape DirectX backend can consume. --- taichi/codegen/dx12/CMakeLists.txt | 1 + taichi/codegen/dx12/codegen_dx12.cpp | 21 ++- .../dx12/dx12_global_optimize_module.cpp | 153 ++++++++++++++++++ taichi/codegen/dx12/dx12_llvm_passes.h | 30 ++++ 4 files changed, 203 insertions(+), 2 deletions(-) create mode 100644 taichi/codegen/dx12/dx12_global_optimize_module.cpp create mode 100644 taichi/codegen/dx12/dx12_llvm_passes.h diff --git a/taichi/codegen/dx12/CMakeLists.txt b/taichi/codegen/dx12/CMakeLists.txt index 8a1b70aaa6ea4..24dca8d27238b 100644 --- a/taichi/codegen/dx12/CMakeLists.txt +++ b/taichi/codegen/dx12/CMakeLists.txt @@ -4,6 +4,7 @@ add_library(dx12_codegen) target_sources(dx12_codegen PRIVATE codegen_dx12.cpp + dx12_global_optimize_module.cpp ) target_include_directories(dx12_codegen diff --git a/taichi/codegen/dx12/codegen_dx12.cpp b/taichi/codegen/dx12/codegen_dx12.cpp index 29b446a4cbe4a..3789b18454420 100644 --- a/taichi/codegen/dx12/codegen_dx12.cpp +++ b/taichi/codegen/dx12/codegen_dx12.cpp @@ -1,5 +1,5 @@ #include "taichi/codegen/dx12/codegen_dx12.h" - +#include "taichi/codegen/dx12/dx12_llvm_passes.h" #include "taichi/rhi/dx12/dx12_api.h" #include "taichi/runtime/program_impls/llvm/llvm_program.h" #include "taichi/common/core.h" @@ -228,7 +228,24 @@ class TaskCodeGenLLVMDX12 : public TaskCodeGenLLVM { static std::vector generate_dxil_from_llvm( LLVMCompiledData &compiled_data, - taichi::lang::Kernel *kernel){TI_NOT_IMPLEMENTED} + taichi::lang::Kernel *kernel){ + // generate dxil from llvm ir. + auto offloaded_local = compiled_data.tasks; + auto module = compiled_data.module.get(); + for (auto &task : offloaded_local) { + llvm::Function *func = module->getFunction(task.name); + TI_ASSERT(func); + directx12::mark_function_as_cs_entry(func); + directx12::set_num_threads( + func, kernel->program->config.default_gpu_block_dim, 1, 1); + // FIXME: save task.block_dim like + // tlctx->mark_function_as_cuda_kernel(func, task.block_dim); + } + auto dx_container = + directx12::global_optimize_module(module, kernel->program->config); + // validate and sign dx container. + return directx12::validate_and_sign(dx_container); +} KernelCodeGenDX12::CompileResult KernelCodeGenDX12::compile() { TI_AUTO_PROF; diff --git a/taichi/codegen/dx12/dx12_global_optimize_module.cpp b/taichi/codegen/dx12/dx12_global_optimize_module.cpp new file mode 100644 index 0000000000000..1b68b93b87024 --- /dev/null +++ b/taichi/codegen/dx12/dx12_global_optimize_module.cpp @@ -0,0 +1,153 @@ + +#include "taichi/common/core.h" +#include "taichi/util/io.h" +#include "taichi/program/program.h" +#include "taichi/ir/ir.h" +#include "taichi/ir/statements.h" +#include "taichi/util/statistics.h" +#include "taichi/util/file_sequence_writer.h" +#include "taichi/runtime/llvm/llvm_context.h" + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/IR/Function.h" + +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/Support/Host.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/IR/Verifier.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/InstCombine/InstCombine.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/IR/GlobalVariable.h" + +using namespace llvm; + +namespace taichi { +namespace lang { +namespace directx12 { + +const llvm::StringRef ShaderAttrKindStr = "hlsl.shader"; + +void mark_function_as_cs_entry(::llvm::Function *F) { + F->addFnAttr(ShaderAttrKindStr, "compute"); +} +bool is_cs_entry(::llvm::Function *F) { + return F->hasFnAttribute(ShaderAttrKindStr); +} + +void set_num_threads(llvm::Function *F, unsigned x, unsigned y, unsigned z) { + const llvm::StringRef NumThreadsAttrKindStr = "hlsl.numthreads"; + std::string Str = llvm::formatv("{0},{1},{2}", x, y, z); + F->addFnAttr(NumThreadsAttrKindStr, Str); +} + +std::vector global_optimize_module(llvm::Module *module, + CompileConfig &config) { + TI_AUTO_PROF + if (llvm::verifyModule(*module, &llvm::errs())) { + module->print(llvm::errs(), nullptr); + TI_ERROR("Module broken"); + } + + for (llvm::Function &F : module->functions()) { + if (directx12::is_cs_entry(&F)) + continue; + F.addFnAttr(llvm::Attribute::AlwaysInline); + } + // FIXME: choose shader model based on feature used. + llvm::StringRef triple = "dxil-pc-shadermodel6.3-compute"; + module->setTargetTriple(triple); + module->setSourceFileName(""); + std::string err_str; + const llvm::Target *target = + TargetRegistry::lookupTarget(triple.str(), err_str); + TI_ERROR_UNLESS(target, err_str); + + TargetOptions options; + if (config.fast_math) { + options.AllowFPOpFusion = FPOpFusion::Fast; + options.UnsafeFPMath = 1; + options.NoInfsFPMath = 1; + options.NoNaNsFPMath = 1; + } else { + options.AllowFPOpFusion = FPOpFusion::Strict; + options.UnsafeFPMath = 0; + options.NoInfsFPMath = 0; + options.NoNaNsFPMath = 0; + } + options.HonorSignDependentRoundingFPMathOption = false; + options.NoZerosInBSS = false; + options.GuaranteedTailCallOpt = false; + + legacy::FunctionPassManager function_pass_manager(module); + legacy::PassManager module_pass_manager; + + llvm::StringRef mcpu = ""; + std::unique_ptr target_machine(target->createTargetMachine( + triple.str(), mcpu.str(), "", options, llvm::Reloc::PIC_, + llvm::CodeModel::Small, + config.opt_level > 0 ? CodeGenOpt::Aggressive : CodeGenOpt::None)); + + TI_ERROR_UNLESS(target_machine.get(), "Could not allocate target machine!"); + + module->setDataLayout(target_machine->createDataLayout()); + + + module_pass_manager.add(createTargetTransformInfoWrapperPass( + target_machine->getTargetIRAnalysis())); + function_pass_manager.add(createTargetTransformInfoWrapperPass( + target_machine->getTargetIRAnalysis())); + + PassManagerBuilder b; + b.OptLevel = 3; + b.Inliner = createFunctionInliningPass(b.OptLevel, 0, false); + b.LoopVectorize = true; + b.SLPVectorize = true; + + target_machine->adjustPassManager(b); + + b.populateFunctionPassManager(function_pass_manager); + b.populateModulePassManager(module_pass_manager); + llvm::SmallString<256> str; + llvm::raw_svector_ostream OS(str); + // Write DXIL container to OS. + target_machine->addPassesToEmitFile(module_pass_manager, OS, nullptr, + CGFT_ObjectFile); + + { + TI_PROFILER("llvm_function_pass"); + function_pass_manager.doInitialization(); + for (llvm::Module::iterator i = module->begin(); i != module->end(); i++) + function_pass_manager.run(*i); + + function_pass_manager.doFinalization(); + } + + { + TI_PROFILER("llvm_module_pass"); + module_pass_manager.run(*module); + } + if (config.print_kernel_llvm_ir_optimized) { + static FileSequenceWriter writer( + "taichi_kernel_dx12_llvm_ir_optimized_{:04d}.ll", + "optimized LLVM IR (DX12)"); + writer.write(module); + } + return std::vector(str.begin(), str.end()); +} + +} // namespace directx12 +} // namespace lang +} // namespace taichi diff --git a/taichi/codegen/dx12/dx12_llvm_passes.h b/taichi/codegen/dx12/dx12_llvm_passes.h new file mode 100644 index 0000000000000..c68670365a191 --- /dev/null +++ b/taichi/codegen/dx12/dx12_llvm_passes.h @@ -0,0 +1,30 @@ + +#pragma once + +#include +#include + +namespace llvm { +class Function; +class Module; +} // namespace llvm + +namespace taichi { +namespace lang { +struct CompileConfig; + +namespace directx12 { + +void mark_function_as_cs_entry(llvm::Function *); +bool is_cs_entry(llvm::Function *); +void set_num_threads(llvm::Function *, unsigned x, unsigned y, unsigned z); + +std::vector global_optimize_module(llvm::Module *module, + CompileConfig &config); + +extern const char *NumWorkGroupsCBName; + +} // namespace directx12 +} // namespace lang +} // namespace taichi + From b5cd7bcc52a8d6bb6027503f9e638779559c831b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 2 Sep 2022 06:31:37 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- taichi/codegen/dx12/codegen_dx12.cpp | 2 +- taichi/codegen/dx12/dx12_global_optimize_module.cpp | 1 - taichi/codegen/dx12/dx12_llvm_passes.h | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/taichi/codegen/dx12/codegen_dx12.cpp b/taichi/codegen/dx12/codegen_dx12.cpp index 3789b18454420..4be95a53f7b1b 100644 --- a/taichi/codegen/dx12/codegen_dx12.cpp +++ b/taichi/codegen/dx12/codegen_dx12.cpp @@ -228,7 +228,7 @@ class TaskCodeGenLLVMDX12 : public TaskCodeGenLLVM { static std::vector generate_dxil_from_llvm( LLVMCompiledData &compiled_data, - taichi::lang::Kernel *kernel){ + taichi::lang::Kernel *kernel) { // generate dxil from llvm ir. auto offloaded_local = compiled_data.tasks; auto module = compiled_data.module.get(); diff --git a/taichi/codegen/dx12/dx12_global_optimize_module.cpp b/taichi/codegen/dx12/dx12_global_optimize_module.cpp index 1b68b93b87024..94a9d7c003328 100644 --- a/taichi/codegen/dx12/dx12_global_optimize_module.cpp +++ b/taichi/codegen/dx12/dx12_global_optimize_module.cpp @@ -104,7 +104,6 @@ std::vector global_optimize_module(llvm::Module *module, module->setDataLayout(target_machine->createDataLayout()); - module_pass_manager.add(createTargetTransformInfoWrapperPass( target_machine->getTargetIRAnalysis())); function_pass_manager.add(createTargetTransformInfoWrapperPass( diff --git a/taichi/codegen/dx12/dx12_llvm_passes.h b/taichi/codegen/dx12/dx12_llvm_passes.h index c68670365a191..c07896abba1a3 100644 --- a/taichi/codegen/dx12/dx12_llvm_passes.h +++ b/taichi/codegen/dx12/dx12_llvm_passes.h @@ -27,4 +27,3 @@ extern const char *NumWorkGroupsCBName; } // namespace directx12 } // namespace lang } // namespace taichi -