[llvm] [refactor] (Decomp of #5251 6/n) Let ModuleToFunctionConverter…

… support multiple modules (#5372) * [llvm] [refactor] (Decomp of #5251 6/n) Let ModuleToFunctionConverter support multiple modules * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update codegen_cuda.cpp * Update codegen_cuda.h Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
taichi-dev · Jul 9, 2022 · 8292973 · 8292973
1 parent 332298b
commit 8292973
Show file tree

Hide file tree

Showing 10 changed files with 136 additions and 115 deletions.
diff --git a/taichi/codegen/codegen.cpp b/taichi/codegen/codegen.cpp
@@ -51,5 +51,19 @@ std::unique_ptr<KernelCodeGen> KernelCodeGen::create(Arch arch,
   TI_ERROR("Llvm disabled");
 #endif
 }
+#ifdef TI_WITH_LLVM
+
+ModuleToFunctionConverter::ModuleToFunctionConverter(
+    TaichiLLVMContext *tlctx,
+    LlvmRuntimeExecutor *executor)
+    : tlctx_(tlctx), executor_(executor) {
+}
 
+FunctionType ModuleToFunctionConverter::convert(
+    const Kernel *kernel,
+    std::vector<LLVMCompiledData> &&data) const {
+  return convert(kernel->name, infer_launch_args(kernel), std::move(data));
+}
+
+#endif
 TLANG_NAMESPACE_END
diff --git a/taichi/codegen/codegen.h b/taichi/codegen/codegen.h
@@ -1,6 +1,7 @@
 // Driver class for kernel code generators.
 
 #pragma once
+#include <taichi/runtime/llvm/llvm_runtime_executor.h>
 #include "taichi/ir/ir.h"
 #include "taichi/program/program.h"
 #ifdef TI_WITH_LLVM
@@ -36,4 +37,26 @@ class KernelCodeGen {
 #endif
 };
 
+#ifdef TI_WITH_LLVM
+
+class ModuleToFunctionConverter {
+ public:
+  explicit ModuleToFunctionConverter(TaichiLLVMContext *tlctx,
+                                     LlvmRuntimeExecutor *program);
+
+  virtual ~ModuleToFunctionConverter() = default;
+
+  virtual FunctionType convert(const std::string &kernel_name,
+                               const std::vector<LlvmLaunchArgInfo> &args,
+                               std::vector<LLVMCompiledData> &&data) const = 0;
+
+  virtual FunctionType convert(const Kernel *kernel,
+                               std::vector<LLVMCompiledData> &&data) const;
+
+ protected:
+  TaichiLLVMContext *tlctx_{nullptr};
+  LlvmRuntimeExecutor *executor_{nullptr};
+};
+
+#endif
 TLANG_NAMESPACE_END
diff --git a/taichi/codegen/cpu/codegen_cpu.cpp b/taichi/codegen/cpu/codegen_cpu.cpp
@@ -221,6 +221,16 @@ class CodeGenLLVMCPU : public CodeGenLLVM {
       TI_NOT_IMPLEMENTED
     }
   }
+
+  FunctionType gen() override {
+    auto compiled_res = run_compilation();
+
+    CPUModuleToFunctionConverter converter{
+        tlctx, get_llvm_program(prog)->get_runtime_executor()};
+    std::vector<LLVMCompiledData> data;
+    data.push_back(std::move(compiled_res));
+    return converter.convert(kernel, std::move(data));
+  }
 };
 
 }  // namespace
@@ -231,6 +241,51 @@ std::unique_ptr<CodeGenLLVM> CodeGenCPU::make_codegen_llvm(Kernel *kernel,
                                                            IRNode *ir) {
   return std::make_unique<CodeGenLLVMCPU>(kernel, ir);
 }
+
+FunctionType CPUModuleToFunctionConverter::convert(
+    const std::string &kernel_name,
+    const std::vector<LlvmLaunchArgInfo> &args,
+    std::vector<LLVMCompiledData> &&data) const {
+  for (auto &datum : data) {
+    tlctx_->add_module(std::move(datum.module));
+  }
+
+  using TaskFunc = int32 (*)(void *);
+  std::vector<TaskFunc> task_funcs;
+  task_funcs.reserve(data.size());
+  for (auto &datum : data) {
+    for (auto &task : datum.tasks) {
+      auto *func_ptr = tlctx_->lookup_function_pointer(task.name);
+      TI_ASSERT_INFO(func_ptr, "Offloaded datum function {} not found",
+                     task.name);
+      task_funcs.push_back((TaskFunc)(func_ptr));
+    }
+  }
+  // Do NOT capture `this`...
+  return [executor = this->executor_, args, kernel_name,
+          task_funcs](RuntimeContext &context) {
+    TI_TRACE("Launching kernel {}", kernel_name);
+    // For taichi ndarrays, context.args saves pointer to its
+    // |DeviceAllocation|, CPU backend actually want to use the raw ptr here.
+    for (int i = 0; i < (int)args.size(); i++) {
+      if (args[i].is_array &&
+          context.device_allocation_type[i] !=
+              RuntimeContext::DevAllocType::kNone &&
+          context.array_runtime_sizes[i] > 0) {
+        DeviceAllocation *ptr =
+            static_cast<DeviceAllocation *>(context.get_arg<void *>(i));
+        uint64 host_ptr = (uint64)executor->get_ndarray_alloc_info_ptr(*ptr);
+        context.set_arg(i, host_ptr);
+        context.set_array_device_allocation_type(
+            i, RuntimeContext::DevAllocType::kNone);
+      }
+    }
+    for (auto task : task_funcs) {
+      task(&context);
+    }
+  };
+}
+
 #endif  // TI_WITH_LLVM
 
 FunctionType CodeGenCPU::codegen() {

diff --git a/taichi/codegen/cpu/codegen_cpu.h b/taichi/codegen/cpu/codegen_cpu.h
@@ -23,4 +23,22 @@ class CodeGenCPU : public KernelCodeGen {
   FunctionType codegen() override;
 };
 
+#ifdef TI_WITH_LLVM
+
+class CPUModuleToFunctionConverter : public ModuleToFunctionConverter {
+ public:
+  explicit CPUModuleToFunctionConverter(TaichiLLVMContext *tlctx,
+                                        LlvmRuntimeExecutor *executor)
+      : ModuleToFunctionConverter(tlctx, executor) {
+  }
+
+  using ModuleToFunctionConverter::convert;
+
+  FunctionType convert(const std::string &kernel_name,
+                       const std::vector<LlvmLaunchArgInfo> &args,
+                       std::vector<LLVMCompiledData> &&data) const override;
+};
+
+#endif
+
 TLANG_NAMESPACE_END
diff --git a/taichi/codegen/cuda/codegen_cuda.cpp b/taichi/codegen/cuda/codegen_cuda.cpp
@@ -41,9 +41,9 @@ class CodeGenLLVMCUDA : public CodeGenLLVM {
     auto *llvm_prog = get_llvm_program(kernel->program);
     CUDAModuleToFunctionConverter converter{tlctx,
                                             llvm_prog->get_runtime_executor()};
-
-    return converter.convert(this->kernel, std::move(compiled_res.module),
-                             std::move(compiled_res.tasks));
+    std::vector<LLVMCompiledData> data;
+    data.push_back(std::move(compiled_res));
+    return converter.convert(this->kernel, std::move(data));
   }
 
   llvm::Value *create_print(std::string tag,
@@ -770,8 +770,9 @@ FunctionType CodeGenCUDA::codegen() {
 FunctionType CUDAModuleToFunctionConverter::convert(
     const std::string &kernel_name,
     const std::vector<LlvmLaunchArgInfo> &args,
-    std::unique_ptr<llvm::Module> mod,
-    std::vector<OffloadedTask> &&tasks) const {
+    std::vector<LLVMCompiledData> &&data) const {
+  auto &mod = data[0].module;
+  auto &tasks = data[0].tasks;
 #ifdef TI_WITH_CUDA
   for (const auto &task : tasks) {
     llvm::Function *func = mod->getFunction(task.name);
@@ -876,12 +877,4 @@ FunctionType CUDAModuleToFunctionConverter::convert(
 #endif  // TI_WITH_CUDA
 }
 
-FunctionType CUDAModuleToFunctionConverter::convert(
-    const Kernel *kernel,
-    std::unique_ptr<llvm::Module> mod,
-    std::vector<OffloadedTask> &&tasks) const {
-  return convert(kernel->name, infer_launch_args(kernel), std::move(mod),
-                 std::move(tasks));
-}
-
 TLANG_NAMESPACE_END
diff --git a/taichi/codegen/cuda/codegen_cuda.h b/taichi/codegen/cuda/codegen_cuda.h
@@ -28,15 +28,11 @@ class CUDAModuleToFunctionConverter : public ModuleToFunctionConverter {
                                          LlvmRuntimeExecutor *executor)
       : ModuleToFunctionConverter(tlctx, executor) {
   }
+  using ModuleToFunctionConverter::convert;
 
   FunctionType convert(const std::string &kernel_name,
                        const std::vector<LlvmLaunchArgInfo> &args,
-                       std::unique_ptr<llvm::Module> mod,
-                       std::vector<OffloadedTask> &&tasks) const override;
-
-  FunctionType convert(const Kernel *kernel,
-                       std::unique_ptr<llvm::Module> mod,
-                       std::vector<OffloadedTask> &&tasks) const override;
+                       std::vector<LLVMCompiledData> &&data) const override;
 };
 
 TLANG_NAMESPACE_END
diff --git a/taichi/codegen/llvm/codegen_llvm.cpp b/taichi/codegen/llvm/codegen_llvm.cpp
@@ -2589,15 +2589,6 @@ bool CodeGenLLVM::maybe_read_compilation_from_cache(
   return true;
 }
 
-FunctionType CodeGenLLVM::gen() {
-  auto compiled_res = run_compilation();
-
-  ModuleToFunctionConverter converter{
-      tlctx, get_llvm_program(prog)->get_runtime_executor()};
-  return converter.convert(kernel, std::move(compiled_res.module),
-                           std::move(compiled_res.tasks));
-}
-
 llvm::Value *CodeGenLLVM::create_xlogue(std::unique_ptr<Block> &block) {
   llvm::Value *xlogue;
 
@@ -2676,60 +2667,6 @@ void CodeGenLLVM::cache_module(const std::string &kernel_key) {
                                        std::move(offloaded_task_list));
 }
 
-ModuleToFunctionConverter::ModuleToFunctionConverter(
-    TaichiLLVMContext *tlctx,
-    LlvmRuntimeExecutor *executor)
-    : tlctx_(tlctx), executor_(executor) {
-}
-
-FunctionType ModuleToFunctionConverter::convert(
-    const std::string &kernel_name,
-    const std::vector<LlvmLaunchArgInfo> &args,
-    std::unique_ptr<llvm::Module> mod,
-    std::vector<OffloadedTask> &&tasks) const {
-  tlctx_->add_module(std::move(mod));
-
-  using TaskFunc = int32 (*)(void *);
-  std::vector<TaskFunc> task_funcs;
-  task_funcs.reserve(tasks.size());
-  for (auto &task : tasks) {
-    auto *func_ptr = tlctx_->lookup_function_pointer(task.name);
-    TI_ASSERT_INFO(func_ptr, "Offloaded task function {} not found", task.name);
-    task_funcs.push_back((TaskFunc)(func_ptr));
-  }
-  // Do NOT capture `this`...
-  return [executor = this->executor_, args, kernel_name,
-          task_funcs](RuntimeContext &context) {
-    TI_TRACE("Launching kernel {}", kernel_name);
-    // For taichi ndarrays, context.args saves pointer to its
-    // |DeviceAllocation|, CPU backend actually want to use the raw ptr here.
-    for (int i = 0; i < (int)args.size(); i++) {
-      if (args[i].is_array &&
-          context.device_allocation_type[i] !=
-              RuntimeContext::DevAllocType::kNone &&
-          context.array_runtime_sizes[i] > 0) {
-        DeviceAllocation *ptr =
-            static_cast<DeviceAllocation *>(context.get_arg<void *>(i));
-        uint64 host_ptr = (uint64)executor->get_ndarray_alloc_info_ptr(*ptr);
-        context.set_arg(i, host_ptr);
-        context.set_array_device_allocation_type(
-            i, RuntimeContext::DevAllocType::kNone);
-      }
-    }
-    for (auto task : task_funcs) {
-      task(&context);
-    }
-  };
-}
-
-FunctionType ModuleToFunctionConverter::convert(
-    const Kernel *kernel,
-    std::unique_ptr<llvm::Module> mod,
-    std::vector<OffloadedTask> &&tasks) const {
-  return convert(kernel->name, infer_launch_args(kernel), std::move(mod),
-                 std::move(tasks));
-}
-
 TLANG_NAMESPACE_END
 
 #endif  // #ifdef TI_WITH_LLVM
diff --git a/taichi/codegen/llvm/codegen_llvm.h b/taichi/codegen/llvm/codegen_llvm.h
@@ -45,6 +45,13 @@ class FunctionCreationGuard {
 struct LLVMCompiledData {
   std::vector<OffloadedTask> tasks;
   std::unique_ptr<llvm::Module> module{nullptr};
+  LLVMCompiledData() = default;
+  LLVMCompiledData(LLVMCompiledData &&) = default;
+  LLVMCompiledData(std::vector<OffloadedTask> tasks,
+                   std::unique_ptr<llvm::Module> module)
+      : tasks(std::move(tasks)), module(std::move(module)) {
+  }
+  TI_IO_DEF(tasks);
 };
 
 class CodeGenLLVM : public IRVisitor, public LLVMModuleBuilder {
@@ -134,7 +141,7 @@ class CodeGenLLVM : public IRVisitor, public LLVMModuleBuilder {
   LLVMCompiledData run_compilation();
 
   // TODO: This function relies largely on `run_compilation()`. Name it better.
-  virtual FunctionType gen();
+  virtual FunctionType gen(){TI_NOT_IMPLEMENTED};
 
   virtual bool supports_offline_cache() const {
     return false;
@@ -413,31 +420,6 @@ class CodeGenLLVM : public IRVisitor, public LLVMModuleBuilder {
   void cache_module(const std::string &kernel_key);
 };
 
-class LlvmRuntimeExecutor;
-
-// TODO: Make ModuleToFunctionConverter abstract,
-//       Move CPU implementation to "taichi/backend/cpu/"
-class ModuleToFunctionConverter {
- public:
-  explicit ModuleToFunctionConverter(TaichiLLVMContext *tlctx,
-                                     LlvmRuntimeExecutor *executor);
-
-  virtual ~ModuleToFunctionConverter() = default;
-
-  virtual FunctionType convert(const std::string &kernel_name,
-                               const std::vector<LlvmLaunchArgInfo> &args,
-                               std::unique_ptr<llvm::Module> mod,
-                               std::vector<OffloadedTask> &&tasks) const;
-
-  virtual FunctionType convert(const Kernel *kernel,
-                               std::unique_ptr<llvm::Module> mod,
-                               std::vector<OffloadedTask> &&tasks) const;
-
- protected:
-  TaichiLLVMContext *tlctx_{nullptr};
-  LlvmRuntimeExecutor *executor_{nullptr};
-};
-
 }  // namespace lang
 }  // namespace taichi
 

diff --git a/taichi/runtime/cpu/aot_module_loader_impl.cpp b/taichi/runtime/cpu/aot_module_loader_impl.cpp
@@ -3,7 +3,7 @@
 
 #include "taichi/runtime/llvm/llvm_offline_cache.h"
 #include "taichi/runtime/llvm/llvm_runtime_executor.h"
-#include "taichi/codegen/llvm/codegen_llvm.h"
+#include "taichi/codegen/cpu/codegen_cpu.h"
 
 namespace taichi {
 namespace lang {
@@ -23,10 +23,11 @@ class AotModuleImpl : public LlvmAotModule {
     TI_ASSERT(arch == Arch::x64 || arch == Arch::arm64);
     auto *tlctx = executor_->get_llvm_context(arch);
 
-    ModuleToFunctionConverter converter{tlctx, executor_};
-
-    return converter.convert(name, loaded.args, std::move(loaded.owned_module),
-                             std::move(loaded.offloaded_task_list));
+    CPUModuleToFunctionConverter converter{tlctx, executor_};
+    std::vector<LLVMCompiledData> data;
+    data.emplace_back(std::move(loaded.offloaded_task_list),
+                      std::move(loaded.owned_module));
+    return converter.convert(name, loaded.args, std::move(data));
   }
 
   std::unique_ptr<aot::KernelTemplate> make_new_kernel_template(

diff --git a/taichi/runtime/cuda/aot_module_loader_impl.cpp b/taichi/runtime/cuda/aot_module_loader_impl.cpp
@@ -25,8 +25,10 @@ class AotModuleImpl : public LlvmAotModule {
 
     CUDAModuleToFunctionConverter converter{tlctx, executor_};
 
-    return converter.convert(name, loaded.args, std::move(loaded.owned_module),
-                             std::move(loaded.offloaded_task_list));
+    std::vector<LLVMCompiledData> data;
+    data.emplace_back(std::move(loaded.offloaded_task_list),
+                      std::move(loaded.owned_module));
+    return converter.convert(name, loaded.args, std::move(data));
   }
 
   std::unique_ptr<aot::KernelTemplate> make_new_kernel_template(