Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[opt] Support offline-cache for kernel with arch=cpu #4500

Merged
merged 13 commits into from
Mar 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,6 @@
[submodule "external/FP16"]
path = external/FP16
url = https://github.com/Maratyszcza/FP16
[submodule "external/PicoSHA2"]
path = external/PicoSHA2
url = https://github.com/okdshin/PicoSHA2.git
1 change: 1 addition & 0 deletions cmake/TaichiCore.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ endif()
include_directories(${CMAKE_SOURCE_DIR})
include_directories(external/include)
include_directories(external/spdlog/include)
include_directories(external/PicoSHA2)
if (TI_WITH_OPENGL)
target_include_directories(${CORE_LIBRARY_NAME} PRIVATE external/glad/include)
endif()
Expand Down
1 change: 1 addition & 0 deletions external/PicoSHA2
Submodule PicoSHA2 added at 167737
5 changes: 3 additions & 2 deletions taichi/backends/cpu/codegen_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ class CodeGenLLVMCPU : public CodeGenLLVM {
public:
using IRVisitor::visit;

CodeGenLLVMCPU(Kernel *kernel, IRNode *ir) : CodeGenLLVM(kernel, ir) {
CodeGenLLVMCPU(Kernel *kernel, IRNode *ir, bool needs_cache)
: CodeGenLLVM(kernel, ir, nullptr, needs_cache) {
TI_AUTO_PROF
}

Expand Down Expand Up @@ -194,7 +195,7 @@ class CodeGenLLVMCPU : public CodeGenLLVM {

FunctionType CodeGenCPU::codegen() {
TI_AUTO_PROF
return CodeGenLLVMCPU(kernel, ir).gen();
return CodeGenLLVMCPU(kernel, ir, needs_cache_).gen();
}

TLANG_NAMESPACE_END
6 changes: 5 additions & 1 deletion taichi/backends/cpu/codegen_cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,14 @@ TLANG_NAMESPACE_BEGIN

class CodeGenCPU : public KernelCodeGen {
public:
CodeGenCPU(Kernel *kernel, IRNode *ir = nullptr) : KernelCodeGen(kernel, ir) {
CodeGenCPU(Kernel *kernel, IRNode *ir = nullptr, bool needs_cache = false)
: KernelCodeGen(kernel, ir), needs_cache_(needs_cache) {
}

FunctionType codegen() override;

private:
bool needs_cache_{false};
};

TLANG_NAMESPACE_END
5 changes: 3 additions & 2 deletions taichi/codegen/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,11 @@ KernelCodeGen::KernelCodeGen(Kernel *kernel, IRNode *ir)

std::unique_ptr<KernelCodeGen> KernelCodeGen::create(Arch arch,
Kernel *kernel,
Stmt *stmt) {
Stmt *stmt,
bool needs_cache) {
#ifdef TI_WITH_LLVM
if (arch_is_cpu(arch) && arch != Arch::wasm) {
return std::make_unique<CodeGenCPU>(kernel, stmt);
return std::make_unique<CodeGenCPU>(kernel, stmt, needs_cache);
} else if (arch == Arch::wasm) {
return std::make_unique<CodeGenWASM>(kernel, stmt);
} else if (arch == Arch::cuda) {
Expand Down
3 changes: 2 additions & 1 deletion taichi/codegen/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ class KernelCodeGen {

static std::unique_ptr<KernelCodeGen> create(Arch arch,
Kernel *kernel,
Stmt *stmt = nullptr);
Stmt *stmt = nullptr,
bool needs_cache = false);

virtual FunctionType codegen() = 0;
};
Expand Down
14 changes: 13 additions & 1 deletion taichi/codegen/codegen_llvm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,8 @@ void CodeGenLLVM::emit_struct_meta_base(const std::string &name,

CodeGenLLVM::CodeGenLLVM(Kernel *kernel,
IRNode *ir,
std::unique_ptr<llvm::Module> &&module)
std::unique_ptr<llvm::Module> &&module,
bool needs_cache)
// TODO: simplify LLVMModuleBuilder ctor input
: LLVMModuleBuilder(
module == nullptr ? kernel->program->get_llvm_program_impl()
Expand All @@ -306,6 +307,7 @@ CodeGenLLVM::CodeGenLLVM(Kernel *kernel,
: std::move(module),
kernel->program->get_llvm_program_impl()->get_llvm_context(
kernel->arch)),
needs_cache_(needs_cache),
kernel(kernel),
ir(ir),
prog(kernel->program) {
Expand Down Expand Up @@ -2240,6 +2242,16 @@ FunctionType CodeGenLLVM::compile_module_to_executable() {
TI_AUTO_PROF
eliminate_unused_functions();

auto *llvm_prog = prog->get_llvm_program_impl();
if (needs_cache_) {
std::vector<std::string> offloaded_task_name_list;
for (auto &task : offloaded_tasks) {
offloaded_task_name_list.push_back(task.name);
}
llvm_prog->cache_kernel(this->kernel->get_key(), this->module.get(),
std::move(offloaded_task_name_list));
}

tlctx->add_module(std::move(module));

for (auto &task : offloaded_tasks) {
Expand Down
6 changes: 5 additions & 1 deletion taichi/codegen/codegen_llvm.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ class FunctionCreationGuard {
};

class CodeGenLLVM : public IRVisitor, public LLVMModuleBuilder {
private:
bool needs_cache_{false};

public:
Kernel *kernel;
IRNode *ir;
Expand Down Expand Up @@ -82,7 +85,8 @@ class CodeGenLLVM : public IRVisitor, public LLVMModuleBuilder {

CodeGenLLVM(Kernel *kernel,
IRNode *ir = nullptr,
std::unique_ptr<llvm::Module> &&module = nullptr);
std::unique_ptr<llvm::Module> &&module = nullptr,
bool needs_cache = false);

Arch current_arch() {
return kernel->arch;
Expand Down
89 changes: 89 additions & 0 deletions taichi/llvm/llvm_offline_cache.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#include "llvm_offline_cache.h"

#include "llvm/AsmParser/Parser.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_os_ostream.h"
#include "llvm/IR/Module.h"

namespace taichi {
namespace lang {

bool LlvmOfflineCacheFileReader::get_kernel_cache(
LlvmOfflineCache::KernelCacheData &res,
const std::string &key,
llvm::LLVMContext &llvm_ctx) {
res.kernel_key = key;
std::string filename_prefix = path_ + "/" + key;
{
std::string filename = filename_prefix + ".ll";
llvm::SMDiagnostic err;
res.owned_module = llvm::parseAssemblyFile(filename, err, llvm_ctx);
res.module = res.owned_module.get();
if (!res.module)
return false;
}
{
std::string filename = filename_prefix + "_otnl.txt";
std::ifstream in(filename, std::ios::in | std::ios::binary);
if (!in.is_open())
return false;
while (true) {
std::string line;
std::getline(in, line, '\n');
if (line.empty())
break;
res.offloaded_task_name_list.push_back(std::move(line));
}
}
return true;
}

void LlvmOfflineCacheFileWriter::dump() {
for (auto &[k, v] : data_.kernels) {
std::string filename_prefix = path_ + "/" + k;
{
std::string filename = filename_prefix + ".ll";
std::ofstream os(filename, std::ios::out | std::ios::binary);
TI_ERROR_IF(!os.is_open(), "File {} open failed", filename);
llvm::SMDiagnostic err;
llvm::LLVMContext ctx;
llvm::raw_os_ostream llvm_os(os);
if (v.module) {
mangle_offloaded_task_name(k, v.module, v.offloaded_task_name_list);
v.module->print(llvm_os, nullptr);
} else if (v.owned_module) {
mangle_offloaded_task_name(k, v.owned_module.get(),
v.offloaded_task_name_list);
v.owned_module->print(llvm_os, nullptr);
} else
TI_ASSERT(false);
}
{
std::string filename = filename_prefix + "_otnl.txt";
std::ofstream os(filename, std::ios::out | std::ios::binary);
TI_ERROR_IF(!os.is_open(), "File {} open failed", filename);
for (const auto &name : v.offloaded_task_name_list) {
os << name << '\n';
}
}
}
}

void LlvmOfflineCacheFileWriter::mangle_offloaded_task_name(
const std::string &kernel_key,
llvm::Module *module,
std::vector<std::string> &offloaded_task_name_list) {
if (!mangled_) {
std::size_t cnt = 0;
for (auto &e : offloaded_task_name_list) {
std::string mangled_name = kernel_key + std::to_string(cnt++);
auto func = module->getFunction(e);
TI_ASSERT(func != nullptr);
func->setName(mangled_name);
e = mangled_name;
}
}
}

} // namespace lang
} // namespace taichi
69 changes: 69 additions & 0 deletions taichi/llvm/llvm_offline_cache.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#pragma once

#include "taichi/common/core.h"
#include "taichi/llvm/llvm_fwd.h"
#include "taichi/util/io.h"

namespace taichi {
namespace lang {

struct LlvmOfflineCache {
struct KernelCacheData {
std::string kernel_key;
std::unique_ptr<llvm::Module> owned_module{nullptr};
llvm::Module *module{nullptr};
std::vector<std::string> offloaded_task_name_list;

KernelCacheData() = default;
KernelCacheData(KernelCacheData &&) = default;
KernelCacheData &operator=(KernelCacheData &&) = default;
~KernelCacheData() = default;
};

std::unordered_map<std::string, KernelCacheData> kernels;
};

class LlvmOfflineCacheFileReader {
public:
LlvmOfflineCacheFileReader(const std::string &path) : path_(path) {
}

bool get_kernel_cache(LlvmOfflineCache::KernelCacheData &res,
const std::string &key,
llvm::LLVMContext &llvm_ctx);

private:
std::string path_;
};

class LlvmOfflineCacheFileWriter {
public:
LlvmOfflineCacheFileWriter(const std::string &path) : path_(path) {
taichi::create_directories(path);
}

void set_data(LlvmOfflineCache &&data) {
this->mangled_ = false;
this->data_ = std::move(data);
}

void add_kernel_cache(const std::string &key,
LlvmOfflineCache::KernelCacheData &&kernel_cache) {
data_.kernels[key] = std::move(kernel_cache);
}

void dump();

private:
void mangle_offloaded_task_name(
const std::string &kernel_key,
llvm::Module *module,
std::vector<std::string> &offloaded_task_name_list);

std::string path_;
LlvmOfflineCache data_;
bool mangled_{false};
};

} // namespace lang
} // namespace taichi
Loading