Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[cuda] Add argument "gpu_max_reg" to ti.init #2161

Merged
merged 3 commits into from
Jan 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion taichi/backends/cpu/jit_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,8 @@ class JITSessionCPU : public JITSession {
return DL;
}

JITModule *add_module(std::unique_ptr<llvm::Module> M) override {
JITModule *add_module(std::unique_ptr<llvm::Module> M, int max_reg) override {
TI_ASSERT(max_reg == 0); // No need to specify max_reg on CPUs
TI_ASSERT(M);
global_optimize_module_cpu(M);
std::lock_guard<std::mutex> _(mut);
Expand Down
3 changes: 2 additions & 1 deletion taichi/backends/cuda/codegen_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ class CodeGenLLVMCUDA : public CodeGenLLVM {
}

auto jit = kernel->program.llvm_context_device->jit.get();
auto cuda_module = jit->add_module(std::move(module));
auto cuda_module =
jit->add_module(std::move(module), kernel->program.config.gpu_max_reg);

return [offloaded_local, cuda_module,
kernel = this->kernel](Context &context) {
Expand Down
1 change: 1 addition & 0 deletions taichi/backends/cuda/cuda_driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ constexpr uint32 CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16;
constexpr uint32 CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75;
constexpr uint32 CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76;
constexpr uint32 CUDA_ERROR_ASSERT = 710;
constexpr uint32 CU_JIT_MAX_REGISTERS = 0;

std::string get_cuda_error_message(uint32 err);

Expand Down
22 changes: 19 additions & 3 deletions taichi/backends/cuda/jit_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ class JITSessionCUDA : public JITSession {
: data_layout(data_layout) {
}

virtual JITModule *add_module(std::unique_ptr<llvm::Module> M) override {
virtual JITModule *add_module(std::unique_ptr<llvm::Module> M,
int max_reg) override {
auto ptx = compile_module_to_ptx(M);
if (get_current_program().config.print_kernel_nvptx) {
static FileSequenceWriter writer("taichi_kernel_nvptx_{:04d}.ptx",
Expand All @@ -103,8 +104,23 @@ class JITSessionCUDA : public JITSession {
TI_TRACE("Loading module...");
[[maybe_unused]] auto &&_ =
std::move(CUDAContext::get_instance().get_lock_guard());
CUDADriver::get_instance().module_load_data_ex(&cuda_module, ptx.c_str(), 0,
nullptr, nullptr);

constexpr int max_num_options = 8;
int num_options = 0;
uint32 options[max_num_options];
void *option_values[max_num_options];

// Insert options
if (max_reg != 0) {
options[num_options] = CU_JIT_MAX_REGISTERS;
option_values[num_options] = &max_reg;
num_options++;
}

TI_ASSERT(num_options <= max_num_options);

CUDADriver::get_instance().module_load_data_ex(
&cuda_module, ptx.c_str(), num_options, options, option_values);
TI_TRACE("CUDA module load time : {}ms", (Time::get_time() - t) * 1000);
// cudaModules.push_back(cudaModule);
modules.push_back(std::make_unique<JITModuleCUDA>(cuda_module));
Expand Down
3 changes: 2 additions & 1 deletion taichi/jit/jit_session.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ class JITSession {
JITSession() {
}

virtual JITModule *add_module(std::unique_ptr<llvm::Module> M) = 0;
virtual JITModule *add_module(std::unique_ptr<llvm::Module> M,
int max_reg = 0) = 0;

// virtual void remove_module(JITModule *module) = 0;

Expand Down
1 change: 1 addition & 0 deletions taichi/program/compile_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ CompileConfig::CompileConfig() {
kernel_profiler = false;
default_cpu_block_dim = 32;
default_gpu_block_dim = 128;
gpu_max_reg = 0; // 0 means using the default value from the CUDA driver.
verbose = true;
fast_math = true;
async_mode = false;
Expand Down
1 change: 1 addition & 0 deletions taichi/program/compile_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ struct CompileConfig {
std::string extra_flags;
int default_cpu_block_dim;
int default_gpu_block_dim;
int gpu_max_reg;
int ad_stack_size;

int saturating_grid_dim;
Expand Down
1 change: 1 addition & 0 deletions taichi/python/export_lang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ void export_lang(py::module &m) {
&CompileConfig::default_cpu_block_dim)
.def_readwrite("default_gpu_block_dim",
&CompileConfig::default_gpu_block_dim)
.def_readwrite("gpu_max_reg", &CompileConfig::gpu_max_reg)
.def_readwrite("saturating_grid_dim", &CompileConfig::saturating_grid_dim)
.def_readwrite("max_block_dim", &CompileConfig::max_block_dim)
.def_readwrite("cpu_max_num_threads", &CompileConfig::cpu_max_num_threads)
Expand Down