diff --git a/docs/contributor_guide.rst b/docs/contributor_guide.rst index da0ed5c6891e0..b32da3214a324 100644 --- a/docs/contributor_guide.rst +++ b/docs/contributor_guide.rst @@ -81,7 +81,7 @@ Please always prepend exactly one tag such as ``[Metal]`` to PR titles. For exam Existing tags: -- ``[Metal], [OpenGL], [CPU], [CUDA], [AMDGPU]``: backends; +- ``[Metal], [OpenGL], [CPU], [CUDA], [AMDGPU], [LLVM]``: backends; - ``[Lang]``: frontend language features, including syntax sugars; - ``[Std]``: standard library, e.g. `ti.Matrix` and `ti.Vector`; - ``[IR]``: intermediate representation; diff --git a/taichi/codegen/codegen_llvm.cpp b/taichi/codegen/codegen_llvm.cpp index e86891f33cbbd..03fdfb221a6d8 100644 --- a/taichi/codegen/codegen_llvm.cpp +++ b/taichi/codegen/codegen_llvm.cpp @@ -1362,7 +1362,6 @@ void CodeGenLLVM::create_offload_struct_for(OffloadedStmt *stmt, bool spmd) { builder->CreateBr(test_bb); builder->SetInsertPoint(after_loop); - builder->CreateRetVoid(); } if (stmt->block_dim == 0) { diff --git a/taichi/jit/jit_arch_cpu.cpp b/taichi/jit/jit_arch_cpu.cpp index b6408aa2ded37..de34d93e693e1 100644 --- a/taichi/jit/jit_arch_cpu.cpp +++ b/taichi/jit/jit_arch_cpu.cpp @@ -1,9 +1,8 @@ // A LLVM JIT compiler for CPU archs wrapper -#include -#include +#include + #include -#include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/ADT/StringRef.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/JITSymbol.h" @@ -17,26 +16,30 @@ #include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Verifier.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/IR/LegacyPassManager.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/IPO.h" -#include + #include "taichi/lang_util.h" #include "taichi/program/program.h" -#include "jit_session.h" +#include "taichi/jit/jit_session.h" // Based on // https://llvm.org/docs/tutorial/BuildingAJIT3.html -// (Note that + +// Note that // https://llvm.org/docs/tutorial/BuildingAJIT2.html // leads to a JIT that crashes all C++ exception after JIT session -// destruction.) +// destruction. TLANG_NAMESPACE_BEGIN @@ -200,6 +203,10 @@ class JITSessionCPU : public JITSession { static void global_optimize_module_cpu( std::unique_ptr &module) { TI_AUTO_PROF + if (llvm::verifyModule(*module, &llvm::errs())) { + module->print(llvm::errs(), nullptr); + TI_ERROR("Module broken"); + } auto JTMB = JITTargetMachineBuilder::detectHost(); if (!JTMB) { TI_ERROR("Target machine creation failed."); diff --git a/taichi/jit/jit_arch_cuda.cpp b/taichi/jit/jit_arch_cuda.cpp index 5b389f1039c79..4d700615ca669 100644 --- a/taichi/jit/jit_arch_cuda.cpp +++ b/taichi/jit/jit_arch_cuda.cpp @@ -1,32 +1,34 @@ - -#if defined(TI_WITH_CUDA) #include -#include +#if defined(TI_WITH_CUDA) #include -#include "taichi/backends/cuda/cuda_context.h" +#include #endif + #include "llvm/ADT/StringRef.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Verifier.h" #include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" -#include -#include -#include -#include +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h" + #include "taichi/backends/cuda/cuda_utils.h" +#include "taichi/backends/cuda/cuda_context.h" #include "taichi/program/program.h" #include "taichi/runtime/llvm/context.h" #include "taichi/system/timer.h" #include "taichi/lang_util.h" -#include "jit_session.h" +#include "taichi/jit/jit_session.h" TLANG_NAMESPACE_BEGIN @@ -135,6 +137,14 @@ std::string convert(std::string new_name) { std::string JITSessionCUDA::compile_module_to_ptx( std::unique_ptr &module) { + /* + TODO: enabling this leads to LLVM error 'comdat global value has private linkage' + if (llvm::verifyModule(*module, &llvm::errs())) { + module->print(llvm::errs(), nullptr); + TI_ERROR("Module broken"); + } + */ + // Part of this function is borrowed from Halide::CodeGen_PTX_Dev.cpp using namespace llvm; diff --git a/taichi/jit/jit_module.h b/taichi/jit/jit_module.h index d185ae8e827cf..f1825986cdc8b 100644 --- a/taichi/jit/jit_module.h +++ b/taichi/jit/jit_module.h @@ -2,6 +2,7 @@ #include #include + #include "taichi/inc/constants.h" #include "taichi/llvm/llvm_fwd.h" #include "taichi/lang_util.h" diff --git a/taichi/jit/jit_session.h b/taichi/jit/jit_session.h index 0b45a71cde5f9..ee9a8afd3e6d9 100644 --- a/taichi/jit/jit_session.h +++ b/taichi/jit/jit_session.h @@ -2,9 +2,10 @@ #include #include + #include "taichi/llvm/llvm_fwd.h" #include "taichi/lang_util.h" -#include "jit_module.h" +#include "taichi/jit/jit_module.h" TLANG_NAMESPACE_BEGIN diff --git a/tests/python/test_struct_for.py b/tests/python/test_struct_for.py index 5c231e702a972..9cb333a504f64 100644 --- a/tests/python/test_struct_for.py +++ b/tests/python/test_struct_for.py @@ -227,3 +227,33 @@ def fill(): for i in range(n): assert x[i] == i + + +@ti.all_archs +def test_struct_for_branching(): + # Related issue: https://github.com/taichi-dev/taichi/issues/704 + x = ti.var(dt=ti.i32) + y = ti.var(dt=ti.i32) + ti.root.pointer(ti.ij, 128 // 4).dense(ti.ij, 4).place(x, y) + + @ti.kernel + def func1(): + for i, j in x: + if x[i, j] & 2 == 2: + y[i, j] = 1 + + @ti.kernel + def func2(): + for i, j in x: + if x[i, j] == 2 or x[i, j] == 4: + y[i, j] = 1 + + @ti.kernel + def func3(): + for i, j in x: + if x[i, j] & 2 == 2 or x[i, j] & 4 == 4: + y[i, j] = 1 + + func1() + func2() + func3()