[dx12] Add ti.dx12. (#6174)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
taichi-dev · Oct 8, 2022 · 42d4087 · 42d4087
1 parent ed12813
commit 42d4087
Show file tree

Hide file tree

Showing 8 changed files with 72 additions and 24 deletions.
diff --git a/python/taichi/lang/misc.py b/python/taichi/lang/misc.py
@@ -149,9 +149,14 @@
 """
 # ----------------------
 
-gpu = [cuda, metal, vulkan, opengl, dx11]
+dx12 = _ti_core.dx12
+"""The DX11 backend.
+"""
+# ----------------------
+
+gpu = [cuda, metal, vulkan, opengl, dx11, dx12]
 """A list of GPU backends supported on the current system.
-Currently contains 'cuda', 'metal', 'opengl', 'vulkan', 'dx11'.
+Currently contains 'cuda', 'metal', 'opengl', 'vulkan', 'dx11', 'dx12'.
 
 When this is used, Taichi automatically picks the matching GPU backend. If no
 GPU is detected, Taichi falls back to the CPU backend.
@@ -726,6 +731,7 @@ def is_arch_supported(arch, use_gles=False):
         cc: _ti_core.with_cc,
         vulkan: _ti_core.with_vulkan,
         dx11: _ti_core.with_dx11,
+        dx12: _ti_core.with_dx12,
         wasm: lambda: True,
         cpu: lambda: True,
     }
@@ -765,9 +771,9 @@ def get_compute_stream_device_time_elapsed_us() -> float:
 
 __all__ = [
     'i', 'ij', 'ijk', 'ijkl', 'ijl', 'ik', 'ikl', 'il', 'j', 'jk', 'jkl', 'jl',
-    'k', 'kl', 'l', 'x86_64', 'x64', 'dx11', 'wasm', 'arm64', 'cc', 'cpu',
-    'cuda', 'gpu', 'metal', 'opengl', 'vulkan', 'extension', 'loop_config',
-    'global_thread_idx', 'assume_in_range', 'block_local', 'cache_read_only',
-    'init', 'mesh_local', 'no_activate', 'reset', 'mesh_patch_idx',
-    'get_compute_stream_device_time_elapsed_us'
+    'k', 'kl', 'l', 'x86_64', 'x64', 'dx11', 'dx12', 'wasm', 'arm64', 'cc',
+    'cpu', 'cuda', 'gpu', 'metal', 'opengl', 'vulkan', 'extension',
+    'loop_config', 'global_thread_idx', 'assume_in_range', 'block_local',
+    'cache_read_only', 'init', 'mesh_local', 'no_activate', 'reset',
+    'mesh_patch_idx', 'get_compute_stream_device_time_elapsed_us'
 ]
diff --git a/taichi/codegen/codegen.cpp b/taichi/codegen/codegen.cpp
@@ -12,6 +12,9 @@
 #if defined(TI_WITH_CUDA)
 #include "taichi/codegen/cuda/codegen_cuda.h"
 #endif
+#if defined(TI_WITH_DX12)
+#include "taichi/codegen/dx12/codegen_dx12.h"
+#endif
 #include "taichi/system/timer.h"
 #include "taichi/ir/analysis.h"
 #include "taichi/ir/transforms.h"
@@ -47,6 +50,12 @@ std::unique_ptr<KernelCodeGen> KernelCodeGen::create(Arch arch,
     return std::make_unique<KernelCodeGenCUDA>(kernel, stmt);
 #else
     TI_NOT_IMPLEMENTED
+#endif
+  } else if (arch == Arch::dx12) {
+#if defined(TI_WITH_DX12)
+    return std::make_unique<KernelCodeGenDX12>(kernel, stmt);
+#else
+    TI_NOT_IMPLEMENTED
 #endif
   } else {
     TI_NOT_IMPLEMENTED

diff --git a/taichi/python/export_misc.cpp b/taichi/python/export_misc.cpp
@@ -32,6 +32,10 @@
 #include "taichi/rhi/opengl/opengl_api.h"
 #endif
 
+#ifdef TI_WITH_DX12
+#include "taichi/rhi/dx12/dx12_api.h"
+#endif
+
 #ifdef TI_WITH_CC
 namespace taichi::lang::cccp {
 extern bool is_c_backend_available();
@@ -163,6 +167,11 @@ void export_misc(py::module &m) {
 #else
   m.def("with_dx11", []() { return false; });
 #endif
+#ifdef TI_WITH_DX12
+  m.def("with_dx12", taichi::lang::directx12::is_dx12_api_available);
+#else
+  m.def("with_dx12", []() { return false; });
+#endif
 
 #ifdef TI_WITH_CC
   m.def("with_cc", taichi::lang::cccp::is_c_backend_available);

diff --git a/taichi/runtime/llvm/llvm_runtime_executor.cpp b/taichi/runtime/llvm/llvm_runtime_executor.cpp
@@ -120,7 +120,7 @@ LlvmRuntimeExecutor::LlvmRuntimeExecutor(CompileConfig &config,
     llvm_context_device_ =
         std::make_unique<TaichiLLVMContext>(config_, Arch::dx12);
     // FIXME: add dx12 JIT.
-    // llvm_context_device_->init_runtime_jit_module();
+    llvm_context_device_->init_runtime_jit_module();
   }
 #endif
 

diff --git a/tests/cpp/aot/llvm/kernel_aot_test.cpp b/tests/cpp/aot/llvm/kernel_aot_test.cpp
@@ -5,6 +5,7 @@
 #include "taichi/system/memory_pool.h"
 #include "taichi/runtime/cpu/aot_module_loader_impl.h"
 #include "taichi/runtime/cuda/aot_module_loader_impl.h"
+#include "taichi/runtime/dx12/aot_module_loader_impl.h"
 #include "taichi/rhi/cuda/cuda_driver.h"
 #include "taichi/platform/cuda/detect_cuda.h"
 
@@ -101,4 +102,20 @@ TEST(LlvmAotTest, CudaKernel) {
   }
 }
 
+#ifdef TI_WITH_DX12
+TEST(LlvmAotTest, DX12Kernel) {
+  directx12::AotModuleParams aot_params;
+  const auto folder_dir = getenv("TAICHI_AOT_FOLDER_PATH");
+
+  std::stringstream aot_mod_ss;
+  aot_mod_ss << folder_dir;
+  aot_params.module_path = aot_mod_ss.str();
+  // FIXME: add executor.
+  auto mod = directx12::make_aot_module(aot_params, Arch::dx12);
+  auto *k_run = mod->get_kernel("run");
+  EXPECT_TRUE(k_run);
+  // FIXME: launch the kernel and check result.
+}
+#endif
+
 }  // namespace taichi::lang
diff --git a/tests/cpp/aot/python_scripts/kernel_aot_test1.py b/tests/cpp/aot/python_scripts/kernel_aot_test1.py
@@ -38,5 +38,7 @@ def run(base: int, arr: ti.types.ndarray()):
         compile_kernel_aot_test1(arch=ti.vulkan)
     elif args.arch == "opengl":
         compile_kernel_aot_test1(arch=ti.opengl)
+    elif args.arch == "dx12":
+        compile_kernel_aot_test1(arch=ti.dx12)
     else:
         assert False
diff --git a/tests/python/test_api.py b/tests/python/test_api.py
@@ -72,22 +72,23 @@ def _get_expected_matrix_apis():
     'atomic_sub', 'atomic_xor', 'axes', 'bit_cast', 'bit_shr', 'block_local',
     'cache_read_only', 'cast', 'cc', 'ceil', 'cos', 'cpu', 'cuda',
     'data_oriented', 'dataclass', 'deactivate', 'deactivate_all_snodes',
-    'dx11', 'eig', 'exp', 'experimental', 'extension', 'f16', 'f32', 'f64',
-    'field', 'float16', 'float32', 'float64', 'floor', 'func', 'get_addr',
-    'get_compute_stream_device_time_elapsed_us', 'global_thread_idx', 'gpu',
-    'graph', 'grouped', 'hex_to_rgb', 'i', 'i16', 'i32', 'i64', 'i8', 'ij',
-    'ijk', 'ijkl', 'ijl', 'ik', 'ikl', 'il', 'init', 'int16', 'int32', 'int64',
-    'int8', 'is_active', 'is_logging_effective', 'j', 'jk', 'jkl', 'jl', 'k',
-    'kernel', 'kl', 'l', 'lang', 'length', 'linalg', 'log', 'loop_config',
-    'math', 'max', 'mesh_local', 'mesh_patch_idx', 'metal', 'min', 'ndarray',
-    'ndrange', 'no_activate', 'one', 'opengl', 'polar_decompose', 'pow',
-    'profiler', 'randn', 'random', 'raw_div', 'raw_mod', 'ref',
-    'rescale_index', 'reset', 'rgb_to_hex', 'root', 'round', 'rsqrt', 'select',
-    'set_logging_level', 'simt', 'sin', 'solve', 'sparse_matrix_builder',
-    'sqrt', 'static', 'static_assert', 'static_print', 'stop_grad', 'svd',
-    'swizzle_generator', 'sym_eig', 'sync', 'tan', 'tanh', 'template', 'tools',
-    'types', 'u16', 'u32', 'u64', 'u8', 'ui', 'uint16', 'uint32', 'uint64',
-    'uint8', 'vulkan', 'wasm', 'x64', 'x86_64', 'zero'
+    'dx11', 'dx12', 'eig', 'exp', 'experimental', 'extension', 'f16', 'f32',
+    'f64', 'field', 'float16', 'float32', 'float64', 'floor', 'func',
+    'get_addr', 'get_compute_stream_device_time_elapsed_us',
+    'global_thread_idx', 'gpu', 'graph', 'grouped', 'hex_to_rgb', 'i', 'i16',
+    'i32', 'i64', 'i8', 'ij', 'ijk', 'ijkl', 'ijl', 'ik', 'ikl', 'il', 'init',
+    'int16', 'int32', 'int64', 'int8', 'is_active', 'is_logging_effective',
+    'j', 'jk', 'jkl', 'jl', 'k', 'kernel', 'kl', 'l', 'lang', 'length',
+    'linalg', 'log', 'loop_config', 'math', 'max', 'mesh_local',
+    'mesh_patch_idx', 'metal', 'min', 'ndarray', 'ndrange', 'no_activate',
+    'one', 'opengl', 'polar_decompose', 'pow', 'profiler', 'randn', 'random',
+    'raw_div', 'raw_mod', 'ref', 'rescale_index', 'reset', 'rgb_to_hex',
+    'root', 'round', 'rsqrt', 'select', 'set_logging_level', 'simt', 'sin',
+    'solve', 'sparse_matrix_builder', 'sqrt', 'static', 'static_assert',
+    'static_print', 'stop_grad', 'svd', 'swizzle_generator', 'sym_eig', 'sync',
+    'tan', 'tanh', 'template', 'tools', 'types', 'u16', 'u32', 'u64', 'u8',
+    'ui', 'uint16', 'uint32', 'uint64', 'uint8', 'vulkan', 'wasm', 'x64',
+    'x86_64', 'zero'
 ]
 user_api[ti.ad] = [
     'FwdMode', 'Tape', 'clear_all_gradients', 'grad_for', 'grad_replaced',

diff --git a/tests/test_config.json b/tests/test_config.json
@@ -8,6 +8,10 @@
         ["cpp", "aot", "python_scripts", "kernel_aot_test1.py"],
         "--arch=cuda"
     ],
+    "LlvmAotTest.DX12Kernel": [
+        ["cpp", "aot", "python_scripts", "kernel_aot_test1.py"],
+        "--arch=dx12"
+    ],
     "LlvmAotTest.CpuField": [
         ["cpp", "aot", "python_scripts", "field_aot_test.py"],
         "--arch=cpu"