Skip to content

Commit

Permalink
[refactor] [ir] Rename "parallelize" to "num_cpu_threads" (#2243)
Browse files Browse the repository at this point in the history
  • Loading branch information
xumingkuan authored Apr 2, 2021
1 parent ce659ef commit c74fc2f
Show file tree
Hide file tree
Showing 10 changed files with 38 additions and 37 deletions.
16 changes: 8 additions & 8 deletions taichi/ir/frontend_ir.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,18 @@ FrontendForStmt::FrontendForStmt(const ExprGroup &loop_var,
: global_var(global_var) {
vectorize = dec.vectorize;
bit_vectorize = dec.bit_vectorize;
parallelize = dec.parallelize;
num_cpu_threads = dec.num_cpu_threads;
strictly_serialized = dec.strictly_serialized;
block_dim = dec.block_dim;
auto cfg = get_current_program().config;
if (cfg.arch == Arch::cuda) {
vectorize = 1;
parallelize = 1;
num_cpu_threads = 1;
TI_ASSERT(block_dim <= taichi_max_gpu_block_dim);
} else {
// cpu
if (parallelize == 0)
parallelize = std::thread::hardware_concurrency();
if (num_cpu_threads == 0)
num_cpu_threads = std::thread::hardware_concurrency();
}
mem_access_opt = dec.mem_access_opt;
dec.reset();
Expand All @@ -69,16 +69,16 @@ FrontendForStmt::FrontendForStmt(const Expr &loop_var,
: begin(begin), end(end) {
vectorize = dec.vectorize;
bit_vectorize = dec.bit_vectorize;
parallelize = dec.parallelize;
num_cpu_threads = dec.num_cpu_threads;
strictly_serialized = dec.strictly_serialized;
block_dim = dec.block_dim;
auto cfg = get_current_program().config;
if (cfg.arch == Arch::cuda) {
vectorize = 1;
parallelize = 1;
num_cpu_threads = 1;
} else {
if (parallelize == 0)
parallelize = std::thread::hardware_concurrency();
if (num_cpu_threads == 0)
num_cpu_threads = std::thread::hardware_concurrency();
}
mem_access_opt = dec.mem_access_opt;
dec.reset();
Expand Down
2 changes: 1 addition & 1 deletion taichi/ir/frontend_ir.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ class FrontendForStmt : public Stmt {
std::vector<Identifier> loop_var_id;
int vectorize;
int bit_vectorize;
int parallelize;
int num_cpu_threads;
bool strictly_serialized;
MemoryAccessOptions mem_access_opt;
int block_dim;
Expand Down
2 changes: 1 addition & 1 deletion taichi/ir/ir.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ std::string snode_access_flag_name(SNodeAccessFlag type) {
void DecoratorRecorder::reset() {
vectorize = -1;
bit_vectorize = -1;
parallelize = 0;
num_cpu_threads = 0;
uniform = false;
mem_access_opt.clear();
block_dim = 0;
Expand Down
4 changes: 2 additions & 2 deletions taichi/ir/ir.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class DecoratorRecorder {
public:
int vectorize;
int bit_vectorize;
int parallelize;
int num_cpu_threads;
bool strictly_serialized;
MemoryAccessOptions mem_access_opt;
int block_dim;
Expand Down Expand Up @@ -712,7 +712,7 @@ inline void BitVectorize(int v) {
}

inline void Parallelize(int v) {
dec.parallelize = v;
dec.num_cpu_threads = v;
}

inline void StrictlySerialize() {
Expand Down
10 changes: 5 additions & 5 deletions taichi/ir/ir_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,22 +48,22 @@ RangeForStmt *IRBuilder::create_range_for(Stmt *begin,
Stmt *end,
int vectorize,
int bit_vectorize,
int parallelize,
int num_cpu_threads,
int block_dim,
bool strictly_serialized) {
return insert(Stmt::make_typed<RangeForStmt>(
begin, end, std::make_unique<Block>(), vectorize, bit_vectorize,
parallelize, block_dim, strictly_serialized));
num_cpu_threads, block_dim, strictly_serialized));
}

StructForStmt *IRBuilder::create_struct_for(SNode *snode,
int vectorize,
int bit_vectorize,
int parallelize,
int num_cpu_threads,
int block_dim) {
return insert(Stmt::make_typed<StructForStmt>(
snode, std::make_unique<Block>(), vectorize, bit_vectorize, parallelize,
block_dim));
snode, std::make_unique<Block>(), vectorize, bit_vectorize,
num_cpu_threads, block_dim));
}

WhileStmt *IRBuilder::create_while_true() {
Expand Down
4 changes: 2 additions & 2 deletions taichi/ir/ir_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,13 @@ class IRBuilder {
Stmt *end,
int vectorize = -1,
int bit_vectorize = -1,
int parallelize = 0,
int num_cpu_threads = 0,
int block_dim = 0,
bool strictly_serialized = false);
StructForStmt *create_struct_for(SNode *snode,
int vectorize = -1,
int bit_vectorize = -1,
int parallelize = 0,
int num_cpu_threads = 0,
int block_dim = 0);
WhileStmt *create_while_true();
IfStmt *create_if(Stmt *cond);
Expand Down
15 changes: 8 additions & 7 deletions taichi/ir/statements.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,15 +224,15 @@ RangeForStmt::RangeForStmt(Stmt *begin,
std::unique_ptr<Block> &&body,
int vectorize,
int bit_vectorize,
int parallelize,
int num_cpu_threads,
int block_dim,
bool strictly_serialized)
: begin(begin),
end(end),
body(std::move(body)),
vectorize(vectorize),
bit_vectorize(bit_vectorize),
parallelize(parallelize),
num_cpu_threads(num_cpu_threads),
block_dim(block_dim),
strictly_serialized(strictly_serialized) {
reversed = false;
Expand All @@ -242,7 +242,7 @@ RangeForStmt::RangeForStmt(Stmt *begin,

std::unique_ptr<Stmt> RangeForStmt::clone() const {
auto new_stmt = std::make_unique<RangeForStmt>(
begin, end, body->clone(), vectorize, bit_vectorize, parallelize,
begin, end, body->clone(), vectorize, bit_vectorize, num_cpu_threads,
block_dim, strictly_serialized);
new_stmt->reversed = reversed;
return new_stmt;
Expand All @@ -252,21 +252,22 @@ StructForStmt::StructForStmt(SNode *snode,
std::unique_ptr<Block> &&body,
int vectorize,
int bit_vectorize,
int parallelize,
int num_cpu_threads,
int block_dim)
: snode(snode),
body(std::move(body)),
vectorize(vectorize),
bit_vectorize(bit_vectorize),
parallelize(parallelize),
num_cpu_threads(num_cpu_threads),
block_dim(block_dim) {
this->body->parent_stmt = this;
TI_STMT_REG_FIELDS;
}

std::unique_ptr<Stmt> StructForStmt::clone() const {
auto new_stmt = std::make_unique<StructForStmt>(
snode, body->clone(), vectorize, bit_vectorize, parallelize, block_dim);
auto new_stmt = std::make_unique<StructForStmt>(snode, body->clone(),
vectorize, bit_vectorize,
num_cpu_threads, block_dim);
new_stmt->mem_access_opt = mem_access_opt;
return new_stmt;
}
Expand Down
12 changes: 6 additions & 6 deletions taichi/ir/statements.h
Original file line number Diff line number Diff line change
Expand Up @@ -551,7 +551,7 @@ class RangeForStmt : public Stmt {
bool reversed;
int vectorize;
int bit_vectorize;
int parallelize;
int num_cpu_threads;
int block_dim;
bool strictly_serialized;

Expand All @@ -560,7 +560,7 @@ class RangeForStmt : public Stmt {
std::unique_ptr<Block> &&body,
int vectorize,
int bit_vectorize,
int parallelize,
int num_cpu_threads,
int block_dim,
bool strictly_serialized);

Expand All @@ -579,7 +579,7 @@ class RangeForStmt : public Stmt {
reversed,
vectorize,
bit_vectorize,
parallelize,
num_cpu_threads,
block_dim,
strictly_serialized);
TI_DEFINE_ACCEPT
Expand All @@ -595,15 +595,15 @@ class StructForStmt : public Stmt {
std::vector<int> index_offsets;
int vectorize;
int bit_vectorize;
int parallelize;
int num_cpu_threads;
int block_dim;
MemoryAccessOptions mem_access_opt;

StructForStmt(SNode *snode,
std::unique_ptr<Block> &&body,
int vectorize,
int bit_vectorize,
int parallelize,
int num_cpu_threads,
int block_dim);

bool is_container_statement() const override {
Expand All @@ -616,7 +616,7 @@ class StructForStmt : public Stmt {
index_offsets,
vectorize,
bit_vectorize,
parallelize,
num_cpu_threads,
block_dim,
mem_access_opt);
TI_DEFINE_ACCEPT
Expand Down
4 changes: 2 additions & 2 deletions taichi/transforms/lower_ast.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ class LowerAST : public IRVisitor {
if (is_good_range_for) {
auto &&new_for = std::make_unique<RangeForStmt>(
begin->stmt, end->stmt, std::move(stmt->body), stmt->vectorize,
stmt->bit_vectorize, stmt->parallelize, stmt->block_dim,
stmt->bit_vectorize, stmt->num_cpu_threads, stmt->block_dim,
stmt->strictly_serialized);
new_for->body->insert(std::make_unique<LoopIndexStmt>(new_for.get(), 0),
0);
Expand Down Expand Up @@ -293,7 +293,7 @@ class LowerAST : public IRVisitor {

auto &&new_for = std::make_unique<StructForStmt>(
snode, std::move(stmt->body), stmt->vectorize, stmt->bit_vectorize,
stmt->parallelize, stmt->block_dim);
stmt->num_cpu_threads, stmt->block_dim);
new_for->index_offsets = offsets;
VecStatement new_statements;
for (int i = 0; i < (int)stmt->loop_var_id.size(); i++) {
Expand Down
6 changes: 3 additions & 3 deletions taichi/transforms/offload.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ class Offloader {
std::make_pair(offloaded.get(), s->end));
}
offloaded->num_cpu_threads =
std::min(s->parallelize,
std::min(s->num_cpu_threads,
root->get_kernel()->program.config.cpu_max_num_threads);
replace_all_usages_with(s, s, offloaded.get());
for (int j = 0; j < (int)s->body->statements.size(); j++) {
Expand Down Expand Up @@ -181,8 +181,8 @@ class Offloader {
}

offloaded_struct_for->snode = for_stmt->snode;
offloaded_struct_for->num_cpu_threads =
std::min(for_stmt->parallelize, program->config.cpu_max_num_threads);
offloaded_struct_for->num_cpu_threads = std::min(
for_stmt->num_cpu_threads, program->config.cpu_max_num_threads);
offloaded_struct_for->mem_access_opt = mem_access_opt;

root_block->insert(std::move(offloaded_struct_for));
Expand Down

0 comments on commit c74fc2f

Please sign in to comment.