diff --git a/taichi/ir/frontend_ir.cpp b/taichi/ir/frontend_ir.cpp index 3de97f000e316..04171fcf02de8 100644 --- a/taichi/ir/frontend_ir.cpp +++ b/taichi/ir/frontend_ir.cpp @@ -32,18 +32,18 @@ FrontendForStmt::FrontendForStmt(const ExprGroup &loop_var, : global_var(global_var) { vectorize = dec.vectorize; bit_vectorize = dec.bit_vectorize; - parallelize = dec.parallelize; + num_cpu_threads = dec.num_cpu_threads; strictly_serialized = dec.strictly_serialized; block_dim = dec.block_dim; auto cfg = get_current_program().config; if (cfg.arch == Arch::cuda) { vectorize = 1; - parallelize = 1; + num_cpu_threads = 1; TI_ASSERT(block_dim <= taichi_max_gpu_block_dim); } else { // cpu - if (parallelize == 0) - parallelize = std::thread::hardware_concurrency(); + if (num_cpu_threads == 0) + num_cpu_threads = std::thread::hardware_concurrency(); } mem_access_opt = dec.mem_access_opt; dec.reset(); @@ -69,16 +69,16 @@ FrontendForStmt::FrontendForStmt(const Expr &loop_var, : begin(begin), end(end) { vectorize = dec.vectorize; bit_vectorize = dec.bit_vectorize; - parallelize = dec.parallelize; + num_cpu_threads = dec.num_cpu_threads; strictly_serialized = dec.strictly_serialized; block_dim = dec.block_dim; auto cfg = get_current_program().config; if (cfg.arch == Arch::cuda) { vectorize = 1; - parallelize = 1; + num_cpu_threads = 1; } else { - if (parallelize == 0) - parallelize = std::thread::hardware_concurrency(); + if (num_cpu_threads == 0) + num_cpu_threads = std::thread::hardware_concurrency(); } mem_access_opt = dec.mem_access_opt; dec.reset(); diff --git a/taichi/ir/frontend_ir.h b/taichi/ir/frontend_ir.h index d532c8e02061e..2b410fda4c34a 100644 --- a/taichi/ir/frontend_ir.h +++ b/taichi/ir/frontend_ir.h @@ -123,7 +123,7 @@ class FrontendForStmt : public Stmt { std::vector loop_var_id; int vectorize; int bit_vectorize; - int parallelize; + int num_cpu_threads; bool strictly_serialized; MemoryAccessOptions mem_access_opt; int block_dim; diff --git a/taichi/ir/ir.cpp b/taichi/ir/ir.cpp index 82e2d8f7c89ce..586865419ead9 100644 --- a/taichi/ir/ir.cpp +++ b/taichi/ir/ir.cpp @@ -24,7 +24,7 @@ std::string snode_access_flag_name(SNodeAccessFlag type) { void DecoratorRecorder::reset() { vectorize = -1; bit_vectorize = -1; - parallelize = 0; + num_cpu_threads = 0; uniform = false; mem_access_opt.clear(); block_dim = 0; diff --git a/taichi/ir/ir.h b/taichi/ir/ir.h index 5d2c3eadfd7d5..4f75195df99d5 100644 --- a/taichi/ir/ir.h +++ b/taichi/ir/ir.h @@ -74,7 +74,7 @@ class DecoratorRecorder { public: int vectorize; int bit_vectorize; - int parallelize; + int num_cpu_threads; bool strictly_serialized; MemoryAccessOptions mem_access_opt; int block_dim; @@ -712,7 +712,7 @@ inline void BitVectorize(int v) { } inline void Parallelize(int v) { - dec.parallelize = v; + dec.num_cpu_threads = v; } inline void StrictlySerialize() { diff --git a/taichi/ir/ir_builder.cpp b/taichi/ir/ir_builder.cpp index 798972c5a4eb5..25ef6188555a7 100644 --- a/taichi/ir/ir_builder.cpp +++ b/taichi/ir/ir_builder.cpp @@ -48,22 +48,22 @@ RangeForStmt *IRBuilder::create_range_for(Stmt *begin, Stmt *end, int vectorize, int bit_vectorize, - int parallelize, + int num_cpu_threads, int block_dim, bool strictly_serialized) { return insert(Stmt::make_typed( begin, end, std::make_unique(), vectorize, bit_vectorize, - parallelize, block_dim, strictly_serialized)); + num_cpu_threads, block_dim, strictly_serialized)); } StructForStmt *IRBuilder::create_struct_for(SNode *snode, int vectorize, int bit_vectorize, - int parallelize, + int num_cpu_threads, int block_dim) { return insert(Stmt::make_typed( - snode, std::make_unique(), vectorize, bit_vectorize, parallelize, - block_dim)); + snode, std::make_unique(), vectorize, bit_vectorize, + num_cpu_threads, block_dim)); } WhileStmt *IRBuilder::create_while_true() { diff --git a/taichi/ir/ir_builder.h b/taichi/ir/ir_builder.h index c924eb8c3dd21..22ea0e50ca8ed 100644 --- a/taichi/ir/ir_builder.h +++ b/taichi/ir/ir_builder.h @@ -64,13 +64,13 @@ class IRBuilder { Stmt *end, int vectorize = -1, int bit_vectorize = -1, - int parallelize = 0, + int num_cpu_threads = 0, int block_dim = 0, bool strictly_serialized = false); StructForStmt *create_struct_for(SNode *snode, int vectorize = -1, int bit_vectorize = -1, - int parallelize = 0, + int num_cpu_threads = 0, int block_dim = 0); WhileStmt *create_while_true(); IfStmt *create_if(Stmt *cond); diff --git a/taichi/ir/statements.cpp b/taichi/ir/statements.cpp index 17911b864e259..7b8083c007d88 100644 --- a/taichi/ir/statements.cpp +++ b/taichi/ir/statements.cpp @@ -224,7 +224,7 @@ RangeForStmt::RangeForStmt(Stmt *begin, std::unique_ptr &&body, int vectorize, int bit_vectorize, - int parallelize, + int num_cpu_threads, int block_dim, bool strictly_serialized) : begin(begin), @@ -232,7 +232,7 @@ RangeForStmt::RangeForStmt(Stmt *begin, body(std::move(body)), vectorize(vectorize), bit_vectorize(bit_vectorize), - parallelize(parallelize), + num_cpu_threads(num_cpu_threads), block_dim(block_dim), strictly_serialized(strictly_serialized) { reversed = false; @@ -242,7 +242,7 @@ RangeForStmt::RangeForStmt(Stmt *begin, std::unique_ptr RangeForStmt::clone() const { auto new_stmt = std::make_unique( - begin, end, body->clone(), vectorize, bit_vectorize, parallelize, + begin, end, body->clone(), vectorize, bit_vectorize, num_cpu_threads, block_dim, strictly_serialized); new_stmt->reversed = reversed; return new_stmt; @@ -252,21 +252,22 @@ StructForStmt::StructForStmt(SNode *snode, std::unique_ptr &&body, int vectorize, int bit_vectorize, - int parallelize, + int num_cpu_threads, int block_dim) : snode(snode), body(std::move(body)), vectorize(vectorize), bit_vectorize(bit_vectorize), - parallelize(parallelize), + num_cpu_threads(num_cpu_threads), block_dim(block_dim) { this->body->parent_stmt = this; TI_STMT_REG_FIELDS; } std::unique_ptr StructForStmt::clone() const { - auto new_stmt = std::make_unique( - snode, body->clone(), vectorize, bit_vectorize, parallelize, block_dim); + auto new_stmt = std::make_unique(snode, body->clone(), + vectorize, bit_vectorize, + num_cpu_threads, block_dim); new_stmt->mem_access_opt = mem_access_opt; return new_stmt; } diff --git a/taichi/ir/statements.h b/taichi/ir/statements.h index 09ea4e37107fc..cd5c24717354e 100644 --- a/taichi/ir/statements.h +++ b/taichi/ir/statements.h @@ -551,7 +551,7 @@ class RangeForStmt : public Stmt { bool reversed; int vectorize; int bit_vectorize; - int parallelize; + int num_cpu_threads; int block_dim; bool strictly_serialized; @@ -560,7 +560,7 @@ class RangeForStmt : public Stmt { std::unique_ptr &&body, int vectorize, int bit_vectorize, - int parallelize, + int num_cpu_threads, int block_dim, bool strictly_serialized); @@ -579,7 +579,7 @@ class RangeForStmt : public Stmt { reversed, vectorize, bit_vectorize, - parallelize, + num_cpu_threads, block_dim, strictly_serialized); TI_DEFINE_ACCEPT @@ -595,7 +595,7 @@ class StructForStmt : public Stmt { std::vector index_offsets; int vectorize; int bit_vectorize; - int parallelize; + int num_cpu_threads; int block_dim; MemoryAccessOptions mem_access_opt; @@ -603,7 +603,7 @@ class StructForStmt : public Stmt { std::unique_ptr &&body, int vectorize, int bit_vectorize, - int parallelize, + int num_cpu_threads, int block_dim); bool is_container_statement() const override { @@ -616,7 +616,7 @@ class StructForStmt : public Stmt { index_offsets, vectorize, bit_vectorize, - parallelize, + num_cpu_threads, block_dim, mem_access_opt); TI_DEFINE_ACCEPT diff --git a/taichi/transforms/lower_ast.cpp b/taichi/transforms/lower_ast.cpp index c6f09ecffde09..42b0183bff76a 100644 --- a/taichi/transforms/lower_ast.cpp +++ b/taichi/transforms/lower_ast.cpp @@ -209,7 +209,7 @@ class LowerAST : public IRVisitor { if (is_good_range_for) { auto &&new_for = std::make_unique( begin->stmt, end->stmt, std::move(stmt->body), stmt->vectorize, - stmt->bit_vectorize, stmt->parallelize, stmt->block_dim, + stmt->bit_vectorize, stmt->num_cpu_threads, stmt->block_dim, stmt->strictly_serialized); new_for->body->insert(std::make_unique(new_for.get(), 0), 0); @@ -293,7 +293,7 @@ class LowerAST : public IRVisitor { auto &&new_for = std::make_unique( snode, std::move(stmt->body), stmt->vectorize, stmt->bit_vectorize, - stmt->parallelize, stmt->block_dim); + stmt->num_cpu_threads, stmt->block_dim); new_for->index_offsets = offsets; VecStatement new_statements; for (int i = 0; i < (int)stmt->loop_var_id.size(); i++) { diff --git a/taichi/transforms/offload.cpp b/taichi/transforms/offload.cpp index a4b3fa710ea68..e38867fc9ed96 100644 --- a/taichi/transforms/offload.cpp +++ b/taichi/transforms/offload.cpp @@ -79,7 +79,7 @@ class Offloader { std::make_pair(offloaded.get(), s->end)); } offloaded->num_cpu_threads = - std::min(s->parallelize, + std::min(s->num_cpu_threads, root->get_kernel()->program.config.cpu_max_num_threads); replace_all_usages_with(s, s, offloaded.get()); for (int j = 0; j < (int)s->body->statements.size(); j++) { @@ -181,8 +181,8 @@ class Offloader { } offloaded_struct_for->snode = for_stmt->snode; - offloaded_struct_for->num_cpu_threads = - std::min(for_stmt->parallelize, program->config.cpu_max_num_threads); + offloaded_struct_for->num_cpu_threads = std::min( + for_stmt->num_cpu_threads, program->config.cpu_max_num_threads); offloaded_struct_for->mem_access_opt = mem_access_opt; root_block->insert(std::move(offloaded_struct_for));