Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[dx12] Drop code for dxil generation. #5958

Merged
merged 2 commits into from
Sep 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions taichi/codegen/dx12/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ add_library(dx12_codegen)
target_sources(dx12_codegen
PRIVATE
codegen_dx12.cpp
dx12_global_optimize_module.cpp
)

target_include_directories(dx12_codegen
Expand Down
21 changes: 19 additions & 2 deletions taichi/codegen/dx12/codegen_dx12.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include "taichi/codegen/dx12/codegen_dx12.h"

#include "taichi/codegen/dx12/dx12_llvm_passes.h"
#include "taichi/rhi/dx12/dx12_api.h"
#include "taichi/runtime/program_impls/llvm/llvm_program.h"
#include "taichi/common/core.h"
Expand Down Expand Up @@ -228,7 +228,24 @@ class TaskCodeGenLLVMDX12 : public TaskCodeGenLLVM {

static std::vector<uint8_t> generate_dxil_from_llvm(
LLVMCompiledData &compiled_data,
taichi::lang::Kernel *kernel){TI_NOT_IMPLEMENTED}
taichi::lang::Kernel *kernel) {
// generate dxil from llvm ir.
auto offloaded_local = compiled_data.tasks;
auto module = compiled_data.module.get();
for (auto &task : offloaded_local) {
llvm::Function *func = module->getFunction(task.name);
TI_ASSERT(func);
directx12::mark_function_as_cs_entry(func);
directx12::set_num_threads(
func, kernel->program->config.default_gpu_block_dim, 1, 1);
// FIXME: save task.block_dim like
// tlctx->mark_function_as_cuda_kernel(func, task.block_dim);
}
auto dx_container =
directx12::global_optimize_module(module, kernel->program->config);
// validate and sign dx container.
return directx12::validate_and_sign(dx_container);
}

KernelCodeGenDX12::CompileResult KernelCodeGenDX12::compile() {
TI_AUTO_PROF;
Expand Down
152 changes: 152 additions & 0 deletions taichi/codegen/dx12/dx12_global_optimize_module.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@

#include "taichi/common/core.h"
#include "taichi/util/io.h"
#include "taichi/program/program.h"
#include "taichi/ir/ir.h"
#include "taichi/ir/statements.h"
#include "taichi/util/statistics.h"
#include "taichi/util/file_sequence_writer.h"
#include "taichi/runtime/llvm/llvm_context.h"

#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/IR/Function.h"

#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/Support/Host.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/IR/Verifier.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/InstCombine/InstCombine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/IR/GlobalVariable.h"

using namespace llvm;

namespace taichi {
namespace lang {
namespace directx12 {

const llvm::StringRef ShaderAttrKindStr = "hlsl.shader";

void mark_function_as_cs_entry(::llvm::Function *F) {
F->addFnAttr(ShaderAttrKindStr, "compute");
}
bool is_cs_entry(::llvm::Function *F) {
return F->hasFnAttribute(ShaderAttrKindStr);
}

void set_num_threads(llvm::Function *F, unsigned x, unsigned y, unsigned z) {
const llvm::StringRef NumThreadsAttrKindStr = "hlsl.numthreads";
std::string Str = llvm::formatv("{0},{1},{2}", x, y, z);
F->addFnAttr(NumThreadsAttrKindStr, Str);
}

std::vector<uint8_t> global_optimize_module(llvm::Module *module,
CompileConfig &config) {
TI_AUTO_PROF
if (llvm::verifyModule(*module, &llvm::errs())) {
module->print(llvm::errs(), nullptr);
TI_ERROR("Module broken");
}

for (llvm::Function &F : module->functions()) {
if (directx12::is_cs_entry(&F))
continue;
F.addFnAttr(llvm::Attribute::AlwaysInline);
}
// FIXME: choose shader model based on feature used.
llvm::StringRef triple = "dxil-pc-shadermodel6.3-compute";
module->setTargetTriple(triple);
module->setSourceFileName("");
std::string err_str;
const llvm::Target *target =
TargetRegistry::lookupTarget(triple.str(), err_str);
TI_ERROR_UNLESS(target, err_str);

TargetOptions options;
if (config.fast_math) {
options.AllowFPOpFusion = FPOpFusion::Fast;
options.UnsafeFPMath = 1;
options.NoInfsFPMath = 1;
options.NoNaNsFPMath = 1;
} else {
options.AllowFPOpFusion = FPOpFusion::Strict;
options.UnsafeFPMath = 0;
options.NoInfsFPMath = 0;
options.NoNaNsFPMath = 0;
}
options.HonorSignDependentRoundingFPMathOption = false;
options.NoZerosInBSS = false;
options.GuaranteedTailCallOpt = false;

legacy::FunctionPassManager function_pass_manager(module);
legacy::PassManager module_pass_manager;

llvm::StringRef mcpu = "";
std::unique_ptr<TargetMachine> target_machine(target->createTargetMachine(
triple.str(), mcpu.str(), "", options, llvm::Reloc::PIC_,
llvm::CodeModel::Small,
config.opt_level > 0 ? CodeGenOpt::Aggressive : CodeGenOpt::None));

TI_ERROR_UNLESS(target_machine.get(), "Could not allocate target machine!");

module->setDataLayout(target_machine->createDataLayout());

module_pass_manager.add(createTargetTransformInfoWrapperPass(
target_machine->getTargetIRAnalysis()));
function_pass_manager.add(createTargetTransformInfoWrapperPass(
target_machine->getTargetIRAnalysis()));

PassManagerBuilder b;
b.OptLevel = 3;
b.Inliner = createFunctionInliningPass(b.OptLevel, 0, false);
b.LoopVectorize = true;
b.SLPVectorize = true;

target_machine->adjustPassManager(b);

b.populateFunctionPassManager(function_pass_manager);
b.populateModulePassManager(module_pass_manager);
llvm::SmallString<256> str;
llvm::raw_svector_ostream OS(str);
// Write DXIL container to OS.
target_machine->addPassesToEmitFile(module_pass_manager, OS, nullptr,
CGFT_ObjectFile);

{
TI_PROFILER("llvm_function_pass");
function_pass_manager.doInitialization();
for (llvm::Module::iterator i = module->begin(); i != module->end(); i++)
function_pass_manager.run(*i);

function_pass_manager.doFinalization();
}

{
TI_PROFILER("llvm_module_pass");
module_pass_manager.run(*module);
}
if (config.print_kernel_llvm_ir_optimized) {
static FileSequenceWriter writer(
"taichi_kernel_dx12_llvm_ir_optimized_{:04d}.ll",
"optimized LLVM IR (DX12)");
writer.write(module);
}
return std::vector<uint8_t>(str.begin(), str.end());
}

} // namespace directx12
} // namespace lang
} // namespace taichi
29 changes: 29 additions & 0 deletions taichi/codegen/dx12/dx12_llvm_passes.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@

#pragma once

#include <string>
#include <vector>

namespace llvm {
class Function;
class Module;
} // namespace llvm

namespace taichi {
namespace lang {
struct CompileConfig;

namespace directx12 {

void mark_function_as_cs_entry(llvm::Function *);
bool is_cs_entry(llvm::Function *);
void set_num_threads(llvm::Function *, unsigned x, unsigned y, unsigned z);

std::vector<uint8_t> global_optimize_module(llvm::Module *module,
CompileConfig &config);

extern const char *NumWorkGroupsCBName;

} // namespace directx12
} // namespace lang
} // namespace taichi