[llvm] [refactor] Merge create_call and call (#6192)

Issue: #3382 Currently `create_call` in `TaskCodeGenLLVM` and `call` in `LLVMModuleBuilder` do the same thing so they should be merged. ### Brief Summary Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
taichi-dev · Sep 29, 2022 · 9d65cbe · 9d65cbe
1 parent 94bda43
commit 9d65cbe
Show file tree

Hide file tree

Showing 7 changed files with 164 additions and 178 deletions.
diff --git a/taichi/codegen/cpu/codegen_cpu.cpp b/taichi/codegen/cpu/codegen_cpu.cpp
@@ -66,11 +66,10 @@ class TaskCodeGenCPU : public TaskCodeGenLLVM {
       stmt->block_dim = std::min(1024, std::max(512, items_per_thread));
     }
 
-    create_call(
-        "cpu_parallel_range_for",
-        {get_arg(0), tlctx->get_constant(stmt->num_cpu_threads), begin, end,
+    call("cpu_parallel_range_for", get_arg(0),
+         tlctx->get_constant(stmt->num_cpu_threads), begin, end,
          tlctx->get_constant(step), tlctx->get_constant(stmt->block_dim),
-         tls_prologue, body, epilogue, tlctx->get_constant(stmt->tls_size)});
+         tls_prologue, body, epilogue, tlctx->get_constant(stmt->tls_size));
   }
 
   void create_offload_mesh_for(OffloadedStmt *stmt) override {
@@ -147,11 +146,11 @@ class TaskCodeGenCPU : public TaskCodeGenLLVM {
 
     llvm::Value *epilogue = create_mesh_xlogue(stmt->tls_epilogue);
 
-    create_call("cpu_parallel_mesh_for",
-                {get_arg(0), tlctx->get_constant(stmt->num_cpu_threads),
-                 tlctx->get_constant(stmt->mesh->num_patches),
-                 tlctx->get_constant(stmt->block_dim), tls_prologue, body,
-                 epilogue, tlctx->get_constant(stmt->tls_size)});
+    call("cpu_parallel_mesh_for", get_arg(0),
+         tlctx->get_constant(stmt->num_cpu_threads),
+         tlctx->get_constant(stmt->mesh->num_patches),
+         tlctx->get_constant(stmt->block_dim), tls_prologue, body, epilogue,
+         tlctx->get_constant(stmt->tls_size));
   }
 
   void create_bls_buffer(OffloadedStmt *stmt) {
@@ -179,9 +178,8 @@ class TaskCodeGenCPU : public TaskCodeGenLLVM {
     auto offloaded_task_name = init_offloaded_task_function(stmt);
     if (prog->this_thread_config().kernel_profiler &&
         arch_is_cpu(prog->this_thread_config().arch)) {
-      call(
-          builder.get(), "LLVMRuntime_profiler_start",
-          {get_runtime(), builder->CreateGlobalStringPtr(offloaded_task_name)});
+      call("LLVMRuntime_profiler_start", get_runtime(),
+           builder->CreateGlobalStringPtr(offloaded_task_name));
     }
     if (stmt->task_type == Type::serial) {
       stmt->body->accept(this);
@@ -204,7 +202,7 @@ class TaskCodeGenCPU : public TaskCodeGenLLVM {
         arch_is_cpu(prog->this_thread_config().arch)) {
       llvm::IRBuilderBase::InsertPointGuard guard(*builder);
       builder->SetInsertPoint(final_block);
-      call(builder.get(), "LLVMRuntime_profiler_stop", {get_runtime()});
+      call("LLVMRuntime_profiler_stop", get_runtime());
     }
     finalize_offloaded_task_function();
     offloaded_tasks.push_back(*current_task);

diff --git a/taichi/codegen/cuda/codegen_cuda.cpp b/taichi/codegen/cuda/codegen_cuda.cpp
@@ -156,38 +156,38 @@ class TaskCodeGenCUDA : public TaskCodeGenLLVM {
 #define UNARY_STD(x)                                                    \
   else if (op == UnaryOpType::x) {                                      \
     if (input_taichi_type->is_primitive(PrimitiveTypeID::f32)) {        \
-      llvm_val[stmt] = create_call("__nv_" #x "f", input);              \
+      llvm_val[stmt] = call("__nv_" #x "f", input);                     \
     } else if (input_taichi_type->is_primitive(PrimitiveTypeID::f64)) { \
-      llvm_val[stmt] = create_call("__nv_" #x, input);                  \
+      llvm_val[stmt] = call("__nv_" #x, input);                         \
     } else if (input_taichi_type->is_primitive(PrimitiveTypeID::i32)) { \
-      llvm_val[stmt] = create_call(#x, input);                          \
+      llvm_val[stmt] = call(#x, input);                                 \
     } else {                                                            \
       TI_NOT_IMPLEMENTED                                                \
     }                                                                   \
   }
     if (op == UnaryOpType::abs) {
       if (input_taichi_type->is_primitive(PrimitiveTypeID::f32)) {
-        llvm_val[stmt] = create_call("__nv_fabsf", input);
+        llvm_val[stmt] = call("__nv_fabsf", input);
       } else if (input_taichi_type->is_primitive(PrimitiveTypeID::f64)) {
-        llvm_val[stmt] = create_call("__nv_fabs", input);
+        llvm_val[stmt] = call("__nv_fabs", input);
       } else if (input_taichi_type->is_primitive(PrimitiveTypeID::i32)) {
-        llvm_val[stmt] = create_call("__nv_abs", input);
+        llvm_val[stmt] = call("__nv_abs", input);
       } else if (input_taichi_type->is_primitive(PrimitiveTypeID::i64)) {
-        llvm_val[stmt] = create_call("__nv_llabs", input);
+        llvm_val[stmt] = call("__nv_llabs", input);
       } else {
         TI_NOT_IMPLEMENTED
       }
     } else if (op == UnaryOpType::sqrt) {
       if (input_taichi_type->is_primitive(PrimitiveTypeID::f32)) {
-        llvm_val[stmt] = create_call("__nv_sqrtf", input);
+        llvm_val[stmt] = call("__nv_sqrtf", input);
       } else if (input_taichi_type->is_primitive(PrimitiveTypeID::f64)) {
-        llvm_val[stmt] = create_call("__nv_sqrt", input);
+        llvm_val[stmt] = call("__nv_sqrt", input);
       } else {
         TI_NOT_IMPLEMENTED
       }
     } else if (op == UnaryOpType::logic_not) {
       if (input_taichi_type->is_primitive(PrimitiveTypeID::i32)) {
-        llvm_val[stmt] = create_call("logic_not_i32", input);
+        llvm_val[stmt] = call("logic_not_i32", input);
       } else {
         TI_NOT_IMPLEMENTED
       }
@@ -247,8 +247,8 @@ class TaskCodeGenCUDA : public TaskCodeGenLLVM {
     }
     TI_ASSERT(fast_reductions.at(prim_type).find(op) !=
               fast_reductions.at(prim_type).end());
-    return create_call(fast_reductions.at(prim_type).at(op),
-                       {llvm_val[stmt->dest], llvm_val[stmt->val]});
+    return call(fast_reductions.at(prim_type).at(op), llvm_val[stmt->dest],
+                llvm_val[stmt->val]);
   }
 
   // LLVM15 already support f16 atomic in
@@ -425,9 +425,8 @@ class TaskCodeGenCUDA : public TaskCodeGenLLVM {
     auto epilogue = create_xlogue(stmt->tls_epilogue);
 
     auto [begin, end] = get_range_for_bounds(stmt);
-    create_call("gpu_parallel_range_for",
-                {get_arg(0), begin, end, tls_prologue, body, epilogue,
-                 tlctx->get_constant(stmt->tls_size)});
+    call("gpu_parallel_range_for", get_arg(0), begin, end, tls_prologue, body,
+         epilogue, tlctx->get_constant(stmt->tls_size));
   }
 
   void create_offload_mesh_for(OffloadedStmt *stmt) override {
@@ -506,10 +505,9 @@ class TaskCodeGenCUDA : public TaskCodeGenLLVM {
 
     auto tls_epilogue = create_mesh_xlogue(stmt->tls_epilogue);
 
-    create_call(
-        "gpu_parallel_mesh_for",
-        {get_arg(0), tlctx->get_constant(stmt->mesh->num_patches), tls_prologue,
-         body, tls_epilogue, tlctx->get_constant(stmt->tls_size)});
+    call("gpu_parallel_mesh_for", get_arg(0),
+         tlctx->get_constant(stmt->mesh->num_patches), tls_prologue, body,
+         tls_epilogue, tlctx->get_constant(stmt->tls_size));
   }
 
   void emit_cuda_gc(OffloadedStmt *stmt) {
@@ -648,9 +646,9 @@ class TaskCodeGenCUDA : public TaskCodeGenLLVM {
   void visit(ExternalTensorShapeAlongAxisStmt *stmt) override {
     const auto arg_id = stmt->arg_id;
     const auto axis = stmt->axis;
-    llvm_val[stmt] = create_call("RuntimeContext_get_extra_args",
-                                 {get_context(), tlctx->get_constant(arg_id),
-                                  tlctx->get_constant(axis)});
+    llvm_val[stmt] =
+        call("RuntimeContext_get_extra_args", get_context(),
+             tlctx->get_constant(arg_id), tlctx->get_constant(axis));
   }
 
   void visit(BinaryOpStmt *stmt) override {
@@ -679,9 +677,9 @@ class TaskCodeGenCUDA : public TaskCodeGenLLVM {
 
     if (op == BinaryOpType::atan2) {
       if (ret_type->is_primitive(PrimitiveTypeID::f32)) {
-        llvm_val[stmt] = create_call("__nv_atan2f", {lhs, rhs});
+        llvm_val[stmt] = call("__nv_atan2f", lhs, rhs);
       } else if (ret_type->is_primitive(PrimitiveTypeID::f64)) {
-        llvm_val[stmt] = create_call("__nv_atan2", {lhs, rhs});
+        llvm_val[stmt] = call("__nv_atan2", lhs, rhs);
       } else {
         TI_P(data_type_name(ret_type));
         TI_NOT_IMPLEMENTED
@@ -690,9 +688,9 @@ class TaskCodeGenCUDA : public TaskCodeGenLLVM {
       // Note that ret_type here cannot be integral because pow with an
       // integral exponent has been demoted in the demote_operations pass
       if (ret_type->is_primitive(PrimitiveTypeID::f32)) {
-        llvm_val[stmt] = create_call("__nv_powf", {lhs, rhs});
+        llvm_val[stmt] = call("__nv_powf", lhs, rhs);
       } else if (ret_type->is_primitive(PrimitiveTypeID::f64)) {
-        llvm_val[stmt] = create_call("__nv_pow", {lhs, rhs});
+        llvm_val[stmt] = call("__nv_pow", lhs, rhs);
       } else {
         TI_P(data_type_name(ret_type));
         TI_NOT_IMPLEMENTED

diff --git a/taichi/codegen/dx12/codegen_dx12.cpp b/taichi/codegen/dx12/codegen_dx12.cpp
@@ -47,9 +47,8 @@ class TaskCodeGenLLVMDX12 : public TaskCodeGenLLVM {
     auto epilogue = create_xlogue(stmt->tls_epilogue);
 
     auto [begin, end] = get_range_for_bounds(stmt);
-    create_call("gpu_parallel_range_for",
-                {get_arg(0), begin, end, tls_prologue, body, epilogue,
-                 tlctx->get_constant(stmt->tls_size)});
+    call("gpu_parallel_range_for", get_arg(0), begin, end, tls_prologue, body,
+         epilogue, tlctx->get_constant(stmt->tls_size));
   }
 
   void create_offload_mesh_for(OffloadedStmt *stmt) override {
@@ -131,10 +130,9 @@ class TaskCodeGenLLVMDX12 : public TaskCodeGenLLVM {
 
     auto tls_epilogue = create_mesh_xlogue(stmt->tls_epilogue);
 
-    create_call(
-        "gpu_parallel_mesh_for",
-        {get_arg(0), tlctx->get_constant(stmt->mesh->num_patches), tls_prologue,
-         body, tls_epilogue, tlctx->get_constant(stmt->tls_size)});
+    call("gpu_parallel_mesh_for", get_arg(0),
+         tlctx->get_constant(stmt->mesh->num_patches), tls_prologue, body,
+         tls_epilogue, tlctx->get_constant(stmt->tls_size));
   }
 
   void create_bls_buffer(OffloadedStmt *stmt) {