Skip to content

Commit

Permalink
[llvm] [refactor] Merge create_call and call (#6192)
Browse files Browse the repository at this point in the history
Issue: #3382
Currently `create_call` in `TaskCodeGenLLVM` and `call` in
`LLVMModuleBuilder` do the same thing so they should be merged.
### Brief Summary

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
lin-hitonami and pre-commit-ci[bot] authored Sep 29, 2022
1 parent 94bda43 commit 9d65cbe
Show file tree
Hide file tree
Showing 7 changed files with 164 additions and 178 deletions.
24 changes: 11 additions & 13 deletions taichi/codegen/cpu/codegen_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,10 @@ class TaskCodeGenCPU : public TaskCodeGenLLVM {
stmt->block_dim = std::min(1024, std::max(512, items_per_thread));
}

create_call(
"cpu_parallel_range_for",
{get_arg(0), tlctx->get_constant(stmt->num_cpu_threads), begin, end,
call("cpu_parallel_range_for", get_arg(0),
tlctx->get_constant(stmt->num_cpu_threads), begin, end,
tlctx->get_constant(step), tlctx->get_constant(stmt->block_dim),
tls_prologue, body, epilogue, tlctx->get_constant(stmt->tls_size)});
tls_prologue, body, epilogue, tlctx->get_constant(stmt->tls_size));
}

void create_offload_mesh_for(OffloadedStmt *stmt) override {
Expand Down Expand Up @@ -147,11 +146,11 @@ class TaskCodeGenCPU : public TaskCodeGenLLVM {

llvm::Value *epilogue = create_mesh_xlogue(stmt->tls_epilogue);

create_call("cpu_parallel_mesh_for",
{get_arg(0), tlctx->get_constant(stmt->num_cpu_threads),
tlctx->get_constant(stmt->mesh->num_patches),
tlctx->get_constant(stmt->block_dim), tls_prologue, body,
epilogue, tlctx->get_constant(stmt->tls_size)});
call("cpu_parallel_mesh_for", get_arg(0),
tlctx->get_constant(stmt->num_cpu_threads),
tlctx->get_constant(stmt->mesh->num_patches),
tlctx->get_constant(stmt->block_dim), tls_prologue, body, epilogue,
tlctx->get_constant(stmt->tls_size));
}

void create_bls_buffer(OffloadedStmt *stmt) {
Expand Down Expand Up @@ -179,9 +178,8 @@ class TaskCodeGenCPU : public TaskCodeGenLLVM {
auto offloaded_task_name = init_offloaded_task_function(stmt);
if (prog->this_thread_config().kernel_profiler &&
arch_is_cpu(prog->this_thread_config().arch)) {
call(
builder.get(), "LLVMRuntime_profiler_start",
{get_runtime(), builder->CreateGlobalStringPtr(offloaded_task_name)});
call("LLVMRuntime_profiler_start", get_runtime(),
builder->CreateGlobalStringPtr(offloaded_task_name));
}
if (stmt->task_type == Type::serial) {
stmt->body->accept(this);
Expand All @@ -204,7 +202,7 @@ class TaskCodeGenCPU : public TaskCodeGenLLVM {
arch_is_cpu(prog->this_thread_config().arch)) {
llvm::IRBuilderBase::InsertPointGuard guard(*builder);
builder->SetInsertPoint(final_block);
call(builder.get(), "LLVMRuntime_profiler_stop", {get_runtime()});
call("LLVMRuntime_profiler_stop", get_runtime());
}
finalize_offloaded_task_function();
offloaded_tasks.push_back(*current_task);
Expand Down
50 changes: 24 additions & 26 deletions taichi/codegen/cuda/codegen_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,38 +156,38 @@ class TaskCodeGenCUDA : public TaskCodeGenLLVM {
#define UNARY_STD(x) \
else if (op == UnaryOpType::x) { \
if (input_taichi_type->is_primitive(PrimitiveTypeID::f32)) { \
llvm_val[stmt] = create_call("__nv_" #x "f", input); \
llvm_val[stmt] = call("__nv_" #x "f", input); \
} else if (input_taichi_type->is_primitive(PrimitiveTypeID::f64)) { \
llvm_val[stmt] = create_call("__nv_" #x, input); \
llvm_val[stmt] = call("__nv_" #x, input); \
} else if (input_taichi_type->is_primitive(PrimitiveTypeID::i32)) { \
llvm_val[stmt] = create_call(#x, input); \
llvm_val[stmt] = call(#x, input); \
} else { \
TI_NOT_IMPLEMENTED \
} \
}
if (op == UnaryOpType::abs) {
if (input_taichi_type->is_primitive(PrimitiveTypeID::f32)) {
llvm_val[stmt] = create_call("__nv_fabsf", input);
llvm_val[stmt] = call("__nv_fabsf", input);
} else if (input_taichi_type->is_primitive(PrimitiveTypeID::f64)) {
llvm_val[stmt] = create_call("__nv_fabs", input);
llvm_val[stmt] = call("__nv_fabs", input);
} else if (input_taichi_type->is_primitive(PrimitiveTypeID::i32)) {
llvm_val[stmt] = create_call("__nv_abs", input);
llvm_val[stmt] = call("__nv_abs", input);
} else if (input_taichi_type->is_primitive(PrimitiveTypeID::i64)) {
llvm_val[stmt] = create_call("__nv_llabs", input);
llvm_val[stmt] = call("__nv_llabs", input);
} else {
TI_NOT_IMPLEMENTED
}
} else if (op == UnaryOpType::sqrt) {
if (input_taichi_type->is_primitive(PrimitiveTypeID::f32)) {
llvm_val[stmt] = create_call("__nv_sqrtf", input);
llvm_val[stmt] = call("__nv_sqrtf", input);
} else if (input_taichi_type->is_primitive(PrimitiveTypeID::f64)) {
llvm_val[stmt] = create_call("__nv_sqrt", input);
llvm_val[stmt] = call("__nv_sqrt", input);
} else {
TI_NOT_IMPLEMENTED
}
} else if (op == UnaryOpType::logic_not) {
if (input_taichi_type->is_primitive(PrimitiveTypeID::i32)) {
llvm_val[stmt] = create_call("logic_not_i32", input);
llvm_val[stmt] = call("logic_not_i32", input);
} else {
TI_NOT_IMPLEMENTED
}
Expand Down Expand Up @@ -247,8 +247,8 @@ class TaskCodeGenCUDA : public TaskCodeGenLLVM {
}
TI_ASSERT(fast_reductions.at(prim_type).find(op) !=
fast_reductions.at(prim_type).end());
return create_call(fast_reductions.at(prim_type).at(op),
{llvm_val[stmt->dest], llvm_val[stmt->val]});
return call(fast_reductions.at(prim_type).at(op), llvm_val[stmt->dest],
llvm_val[stmt->val]);
}

// LLVM15 already support f16 atomic in
Expand Down Expand Up @@ -425,9 +425,8 @@ class TaskCodeGenCUDA : public TaskCodeGenLLVM {
auto epilogue = create_xlogue(stmt->tls_epilogue);

auto [begin, end] = get_range_for_bounds(stmt);
create_call("gpu_parallel_range_for",
{get_arg(0), begin, end, tls_prologue, body, epilogue,
tlctx->get_constant(stmt->tls_size)});
call("gpu_parallel_range_for", get_arg(0), begin, end, tls_prologue, body,
epilogue, tlctx->get_constant(stmt->tls_size));
}

void create_offload_mesh_for(OffloadedStmt *stmt) override {
Expand Down Expand Up @@ -506,10 +505,9 @@ class TaskCodeGenCUDA : public TaskCodeGenLLVM {

auto tls_epilogue = create_mesh_xlogue(stmt->tls_epilogue);

create_call(
"gpu_parallel_mesh_for",
{get_arg(0), tlctx->get_constant(stmt->mesh->num_patches), tls_prologue,
body, tls_epilogue, tlctx->get_constant(stmt->tls_size)});
call("gpu_parallel_mesh_for", get_arg(0),
tlctx->get_constant(stmt->mesh->num_patches), tls_prologue, body,
tls_epilogue, tlctx->get_constant(stmt->tls_size));
}

void emit_cuda_gc(OffloadedStmt *stmt) {
Expand Down Expand Up @@ -648,9 +646,9 @@ class TaskCodeGenCUDA : public TaskCodeGenLLVM {
void visit(ExternalTensorShapeAlongAxisStmt *stmt) override {
const auto arg_id = stmt->arg_id;
const auto axis = stmt->axis;
llvm_val[stmt] = create_call("RuntimeContext_get_extra_args",
{get_context(), tlctx->get_constant(arg_id),
tlctx->get_constant(axis)});
llvm_val[stmt] =
call("RuntimeContext_get_extra_args", get_context(),
tlctx->get_constant(arg_id), tlctx->get_constant(axis));
}

void visit(BinaryOpStmt *stmt) override {
Expand Down Expand Up @@ -679,9 +677,9 @@ class TaskCodeGenCUDA : public TaskCodeGenLLVM {

if (op == BinaryOpType::atan2) {
if (ret_type->is_primitive(PrimitiveTypeID::f32)) {
llvm_val[stmt] = create_call("__nv_atan2f", {lhs, rhs});
llvm_val[stmt] = call("__nv_atan2f", lhs, rhs);
} else if (ret_type->is_primitive(PrimitiveTypeID::f64)) {
llvm_val[stmt] = create_call("__nv_atan2", {lhs, rhs});
llvm_val[stmt] = call("__nv_atan2", lhs, rhs);
} else {
TI_P(data_type_name(ret_type));
TI_NOT_IMPLEMENTED
Expand All @@ -690,9 +688,9 @@ class TaskCodeGenCUDA : public TaskCodeGenLLVM {
// Note that ret_type here cannot be integral because pow with an
// integral exponent has been demoted in the demote_operations pass
if (ret_type->is_primitive(PrimitiveTypeID::f32)) {
llvm_val[stmt] = create_call("__nv_powf", {lhs, rhs});
llvm_val[stmt] = call("__nv_powf", lhs, rhs);
} else if (ret_type->is_primitive(PrimitiveTypeID::f64)) {
llvm_val[stmt] = create_call("__nv_pow", {lhs, rhs});
llvm_val[stmt] = call("__nv_pow", lhs, rhs);
} else {
TI_P(data_type_name(ret_type));
TI_NOT_IMPLEMENTED
Expand Down
12 changes: 5 additions & 7 deletions taichi/codegen/dx12/codegen_dx12.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,8 @@ class TaskCodeGenLLVMDX12 : public TaskCodeGenLLVM {
auto epilogue = create_xlogue(stmt->tls_epilogue);

auto [begin, end] = get_range_for_bounds(stmt);
create_call("gpu_parallel_range_for",
{get_arg(0), begin, end, tls_prologue, body, epilogue,
tlctx->get_constant(stmt->tls_size)});
call("gpu_parallel_range_for", get_arg(0), begin, end, tls_prologue, body,
epilogue, tlctx->get_constant(stmt->tls_size));
}

void create_offload_mesh_for(OffloadedStmt *stmt) override {
Expand Down Expand Up @@ -131,10 +130,9 @@ class TaskCodeGenLLVMDX12 : public TaskCodeGenLLVM {

auto tls_epilogue = create_mesh_xlogue(stmt->tls_epilogue);

create_call(
"gpu_parallel_mesh_for",
{get_arg(0), tlctx->get_constant(stmt->mesh->num_patches), tls_prologue,
body, tls_epilogue, tlctx->get_constant(stmt->tls_size)});
call("gpu_parallel_mesh_for", get_arg(0),
tlctx->get_constant(stmt->mesh->num_patches), tls_prologue, body,
tls_epilogue, tlctx->get_constant(stmt->tls_size));
}

void create_bls_buffer(OffloadedStmt *stmt) {
Expand Down
Loading

0 comments on commit 9d65cbe

Please sign in to comment.