[llvm] Cache llvm runtime bitcode to reduce start up time in dev mode (…

…#1144) * [llvm] cache llvm runtime bitcode to reduce start up time in dev mode * add TI_CACHE_RUNTIME_BITCODE env cfg * [skip ci] revert * [skip ci] enforce code format * [skip ci] Apply suggestions from code review Co-authored-by: Yuanming Hu <yuanming-hu@users.noreply.github.com> Co-authored-by: Taichi Gardener <taichigardener@gmail.com> Co-authored-by: Yuanming Hu <yuanming-hu@users.noreply.github.com>
taichi-dev · Jun 6, 2020 · a345d5b · a345d5b
1 parent e1c0336
commit a345d5b
Show file tree

Hide file tree

Showing 2 changed files with 64 additions and 16 deletions.
diff --git a/docs/global_settings.rst b/docs/global_settings.rst
@@ -1,14 +1,35 @@
 Global settings
 ---------------
 
-- Disable advanced optimization to save compile time: ``ti.core.toggle_advanced_optimization(False)``
-- Restart the Taichi runtime system (clear memory, destroy all variables and kernels): ``ti.reset()``
-- Eliminate verbose outputs: ``ti.get_runtime().set_verbose(False)``
-- To not trigger GDB when crashes: ``export TI_GDB_TRIGGER=0``
-- To not use unified memory for CUDA: ``export TI_USE_UNIFIED_MEMORY=0``
-- To specify pre-allocated memory size for CUDA: ``export TI_DEVICE_MEMORY_GB=0.5``
-- Show more detailed log (TI_TRACE): ``export TI_LOG_LEVEL=trace``
-- To specify which GPU to use for CUDA: ``export CUDA_VISIBLE_DEVICES=[gpuid]``
-- To specify which Arch to use: ``export TI_ARCH=cuda``
-- To print intermediate IR generated: ``export TI_PRINT_IR=1``
-- To print verbose details: ``export TI_VERBOSE=1``
+Backends
+********
+
+- To specify which Arch to use: ``export TI_ARCH=cuda`` or ``ti.init(arch=ti.cuda)``.
+- To specify which GPU to use for CUDA: ``export CUDA_VISIBLE_DEVICES=[gpuid]``.
+- To specify pre-allocated memory size for CUDA: ``export TI_DEVICE_MEMORY_GB=0.5`` or ``ti.init(device_memory_GB=0.5)``.
+- To disable unified memory usage on CUDA: ``export TI_USE_UNIFIED_MEMORY=0``.
+
+Compilation
+***********
+
+- Disable advanced optimization to save compile time & possible erros: ``ti.core.toggle_advanced_optimization(False)``.
+- To print intermediate IR generated: ``export TI_PRINT_IR=1`` or ``ti.init(print_ir=True)``.
+- To print preprocessed Python code: ``export TI_PRINT_PREPROCESSED=1`` or ``ti.init(print_preprocessed=True)``..
+
+Runtime
+*******
+
+- Restart the entire Taichi system (destroy all tensors and kernels): ``ti.reset()``.
+- To start program in debug mode: ``export TI_DEBUG=1`` or ``ti.init(debug=True)``.
+
+Logging
+*******
+
+- Show more detailed log to level TRACE: ``export TI_LOG_LEVEL=trace`` or ``ti.set_logging_level(ti.TRACE)``.
+- Eliminate verbose outputs: ``ti.get_runtime().set_verbose(False)`` or ``TI_VERBOSE=0``.
+
+Develop
+*******
+
+- Cache compiled runtime bitcode in **dev mode** to save start up time: ``export TI_CACHE_RUNTIME_BITCODE=1``.
+- To trigger GDB when Taichi crashes: ``export TI_GDB_TRIGGER=1``.
diff --git a/taichi/llvm/llvm_context.cpp b/taichi/llvm/llvm_context.cpp
@@ -37,6 +37,8 @@
 #include "taichi/lang_util.h"
 #include "taichi/jit/jit_session.h"
 #include "taichi/common/task.h"
+#include "taichi/util/environ_config.h"
+#include <filesystem>
 
 TLANG_NAMESPACE_BEGIN
 
@@ -130,14 +132,32 @@ void compile_runtime_bitcode(Arch arch) {
   if (is_release())
     return;
   TI_AUTO_PROF;
+  bool do_cache = get_environ_config("TI_CACHE_RUNTIME_BITCODE", 0);
   static std::set<int> runtime_compiled;
   if (runtime_compiled.find((int)arch) == runtime_compiled.end()) {
+    auto runtime_src_folder = get_runtime_src_dir();
+    auto runtime_folder = get_runtime_dir();
+    auto fn_bc = get_runtime_fn(arch);
+    auto src_runtime_bc = fmt::format("{}{}", runtime_src_folder, fn_bc);
+    auto dst_runtime_bc = fmt::format("{}{}", runtime_folder, fn_bc);
+    namespace fs = std::filesystem;
+    if (do_cache && fs::exists(src_runtime_bc)) {
+      TI_TRACE("Restoring cached runtime module bitcode [{}]...",
+               src_runtime_bc);
+      std::error_code ec;
+      if (!fs::copy_file(src_runtime_bc, dst_runtime_bc,
+                         fs::copy_options::overwrite_existing)) {
+        TI_WARN("Failed to copy from saved runtime bitcode cache.");
+      } else {
+        TI_TRACE("Runtime module bitcode loaded.");
+        runtime_compiled.insert((int)arch);
+        return;
+      }
+    }
     auto clang =
         find_existing_command({"clang-7", "clang-8", "clang-9", "clang"});
     TI_ASSERT(command_exist("llvm-as"));
     TI_TRACE("Compiling runtime module bitcode...");
-    auto runtime_src_folder = get_runtime_src_dir();
-    auto runtime_folder = get_runtime_dir();
     std::string macro = fmt::format(" -D ARCH_{} ", arch_name(arch));
     auto cmd = fmt::format(
         "{} -S {}runtime.cpp -o {}runtime.ll -fno-exceptions "
@@ -147,11 +167,18 @@ void compile_runtime_bitcode(Arch arch) {
     if (ret) {
       TI_ERROR("LLVMRuntime compilation failed.");
     }
-    cmd = fmt::format("llvm-as {}runtime.ll -o {}{}", runtime_folder,
-                      runtime_folder, get_runtime_fn(arch));
+    cmd = fmt::format("llvm-as {}runtime.ll -o {}", runtime_folder,
+                      dst_runtime_bc);
     std::system(cmd.c_str());
-    TI_TRACE("runtime module bitcode compiled.");
+    TI_TRACE("Runtime module bitcode compiled.");
     runtime_compiled.insert((int)arch);
+    if (do_cache) {
+      TI_TRACE("Saving runtime module bitcode cache [{}]...", dst_runtime_bc);
+      if (!fs::copy_file(dst_runtime_bc, src_runtime_bc,
+                         fs::copy_options::overwrite_existing)) {
+        TI_WARN("Failed to save runtime bitcode cache.");
+      }
+    }
   }
 }