finalize

taichi-dev · k-ye · Aug 26, 2020 · Aug 23, 2020 · Aug 26, 2020 · Aug 26, 2020
commit 2bd534a58aae05c3812cc33d7e4dd4305e99fd18
diff --git a/taichi/program/async_engine.cpp b/taichi/program/async_engine.cpp
@@ -175,7 +175,7 @@ void ExecutionQueue::enqueue(KernelLaunchRecord &&ker) {
         auto config = kernel->program.config;
         auto ir = stmt;
         offload_to_executable(
-            ir, config, /*verbose=*/config.print_ir,
+            ir, config, /*verbose=*/false,
             /*lower_global_access=*/true,
             /*make_thread_local=*/true,
             /*make_block_local=*/

diff --git a/taichi/program/program.cpp b/taichi/program/program.cpp
@@ -630,7 +630,7 @@ Kernel &Program::get_snode_writer(SNode *snode) {
 }
 
 uint64 Program::fetch_result_uint64(int i) {
-  // Precondition: caller must have already done a program synchronization.
+  device_synchronize();
   uint64 ret;
   auto arch = config.arch;
   if (arch == Arch::cuda) {

diff --git a/taichi/program/program.h b/taichi/program/program.h
@@ -135,9 +135,11 @@ class Program {
     context.runtime = (LLVMRuntime *)llvm_runtime;
     return context;
   }
+
   void initialize_device_llvm_context();
 
   void synchronize();
+
   void device_synchronize();
 
   void layout(std::function<void()> func) {
@@ -258,7 +260,6 @@ class Program {
     auto runtime = tlctx->runtime_jit_module;
     runtime->call<void *, Args...>("runtime_" + key, llvm_runtime,
                                    std::forward<Args>(args)...);
-    device_synchronize();
     return fetch_result<T>(taichi_result_buffer_runtime_query_id);
   }
 

diff --git a/taichi/transforms/constant_fold.cpp b/taichi/transforms/constant_fold.cpp
@@ -121,9 +121,6 @@ class ConstantFold : public BasicStmtVisitor {
     launch_ctx.set_arg_raw(0, lhs.val_u64);
     launch_ctx.set_arg_raw(1, rhs.val_u64);
     (*ker)(launch_ctx);
-    // Constant folding kernel is always run in sync mode, therefore we call
-    // device_synchronize().
-    current_program.device_synchronize();
     ret.val_i64 = current_program.fetch_result<int64_t>(0);
     return true;
   }
@@ -146,9 +143,6 @@ class ConstantFold : public BasicStmtVisitor {
     auto launch_ctx = ker->make_launch_context();
     launch_ctx.set_arg_raw(0, operand.val_u64);
     (*ker)(launch_ctx);
-    // Constant folding kernel is always run in sync mode, therefore we call
-    // device_synchronize().
-    current_program.device_synchronize();
     ret.val_i64 = current_program.fetch_result<int64_t>(0);
     return true;
   }

diff --git a/tests/python/test_constant_fold.py b/tests/python/test_constant_fold.py
@@ -1,7 +1,7 @@
 import taichi as ti
 
 
-@ti.test(arch=ti.cpu, async_mode=True)
+@ti.test(require=ti.extension.async_mode, async_mode=True)
 def test_constant_fold():
     n = 100
 

diff --git a/tmp.py b/tmp.py