From 42d4087c4711a1a6dfb58214c266144b3e4037ba Mon Sep 17 00:00:00 2001 From: Xiang Li Date: Fri, 7 Oct 2022 19:19:38 -0700 Subject: [PATCH] [dx12] Add ti.dx12. (#6174) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- python/taichi/lang/misc.py | 20 +++++++---- taichi/codegen/codegen.cpp | 9 +++++ taichi/python/export_misc.cpp | 9 +++++ taichi/runtime/llvm/llvm_runtime_executor.cpp | 2 +- tests/cpp/aot/llvm/kernel_aot_test.cpp | 17 ++++++++++ .../aot/python_scripts/kernel_aot_test1.py | 2 ++ tests/python/test_api.py | 33 ++++++++++--------- tests/test_config.json | 4 +++ 8 files changed, 72 insertions(+), 24 deletions(-) diff --git a/python/taichi/lang/misc.py b/python/taichi/lang/misc.py index 7938c09b3fcc0..53956ce9ebc2c 100644 --- a/python/taichi/lang/misc.py +++ b/python/taichi/lang/misc.py @@ -149,9 +149,14 @@ """ # ---------------------- -gpu = [cuda, metal, vulkan, opengl, dx11] +dx12 = _ti_core.dx12 +"""The DX11 backend. +""" +# ---------------------- + +gpu = [cuda, metal, vulkan, opengl, dx11, dx12] """A list of GPU backends supported on the current system. -Currently contains 'cuda', 'metal', 'opengl', 'vulkan', 'dx11'. +Currently contains 'cuda', 'metal', 'opengl', 'vulkan', 'dx11', 'dx12'. When this is used, Taichi automatically picks the matching GPU backend. If no GPU is detected, Taichi falls back to the CPU backend. @@ -726,6 +731,7 @@ def is_arch_supported(arch, use_gles=False): cc: _ti_core.with_cc, vulkan: _ti_core.with_vulkan, dx11: _ti_core.with_dx11, + dx12: _ti_core.with_dx12, wasm: lambda: True, cpu: lambda: True, } @@ -765,9 +771,9 @@ def get_compute_stream_device_time_elapsed_us() -> float: __all__ = [ 'i', 'ij', 'ijk', 'ijkl', 'ijl', 'ik', 'ikl', 'il', 'j', 'jk', 'jkl', 'jl', - 'k', 'kl', 'l', 'x86_64', 'x64', 'dx11', 'wasm', 'arm64', 'cc', 'cpu', - 'cuda', 'gpu', 'metal', 'opengl', 'vulkan', 'extension', 'loop_config', - 'global_thread_idx', 'assume_in_range', 'block_local', 'cache_read_only', - 'init', 'mesh_local', 'no_activate', 'reset', 'mesh_patch_idx', - 'get_compute_stream_device_time_elapsed_us' + 'k', 'kl', 'l', 'x86_64', 'x64', 'dx11', 'dx12', 'wasm', 'arm64', 'cc', + 'cpu', 'cuda', 'gpu', 'metal', 'opengl', 'vulkan', 'extension', + 'loop_config', 'global_thread_idx', 'assume_in_range', 'block_local', + 'cache_read_only', 'init', 'mesh_local', 'no_activate', 'reset', + 'mesh_patch_idx', 'get_compute_stream_device_time_elapsed_us' ] diff --git a/taichi/codegen/codegen.cpp b/taichi/codegen/codegen.cpp index 674cdd18d21ad..7a617a3928643 100644 --- a/taichi/codegen/codegen.cpp +++ b/taichi/codegen/codegen.cpp @@ -12,6 +12,9 @@ #if defined(TI_WITH_CUDA) #include "taichi/codegen/cuda/codegen_cuda.h" #endif +#if defined(TI_WITH_DX12) +#include "taichi/codegen/dx12/codegen_dx12.h" +#endif #include "taichi/system/timer.h" #include "taichi/ir/analysis.h" #include "taichi/ir/transforms.h" @@ -47,6 +50,12 @@ std::unique_ptr KernelCodeGen::create(Arch arch, return std::make_unique(kernel, stmt); #else TI_NOT_IMPLEMENTED +#endif + } else if (arch == Arch::dx12) { +#if defined(TI_WITH_DX12) + return std::make_unique(kernel, stmt); +#else + TI_NOT_IMPLEMENTED #endif } else { TI_NOT_IMPLEMENTED diff --git a/taichi/python/export_misc.cpp b/taichi/python/export_misc.cpp index 37ca97f06b45c..3e08a5df69b95 100644 --- a/taichi/python/export_misc.cpp +++ b/taichi/python/export_misc.cpp @@ -32,6 +32,10 @@ #include "taichi/rhi/opengl/opengl_api.h" #endif +#ifdef TI_WITH_DX12 +#include "taichi/rhi/dx12/dx12_api.h" +#endif + #ifdef TI_WITH_CC namespace taichi::lang::cccp { extern bool is_c_backend_available(); @@ -163,6 +167,11 @@ void export_misc(py::module &m) { #else m.def("with_dx11", []() { return false; }); #endif +#ifdef TI_WITH_DX12 + m.def("with_dx12", taichi::lang::directx12::is_dx12_api_available); +#else + m.def("with_dx12", []() { return false; }); +#endif #ifdef TI_WITH_CC m.def("with_cc", taichi::lang::cccp::is_c_backend_available); diff --git a/taichi/runtime/llvm/llvm_runtime_executor.cpp b/taichi/runtime/llvm/llvm_runtime_executor.cpp index a22a869eafd9d..6cc2a7876bc36 100644 --- a/taichi/runtime/llvm/llvm_runtime_executor.cpp +++ b/taichi/runtime/llvm/llvm_runtime_executor.cpp @@ -120,7 +120,7 @@ LlvmRuntimeExecutor::LlvmRuntimeExecutor(CompileConfig &config, llvm_context_device_ = std::make_unique(config_, Arch::dx12); // FIXME: add dx12 JIT. - // llvm_context_device_->init_runtime_jit_module(); + llvm_context_device_->init_runtime_jit_module(); } #endif diff --git a/tests/cpp/aot/llvm/kernel_aot_test.cpp b/tests/cpp/aot/llvm/kernel_aot_test.cpp index bb177dfd37aa9..ba026a39ab5e2 100644 --- a/tests/cpp/aot/llvm/kernel_aot_test.cpp +++ b/tests/cpp/aot/llvm/kernel_aot_test.cpp @@ -5,6 +5,7 @@ #include "taichi/system/memory_pool.h" #include "taichi/runtime/cpu/aot_module_loader_impl.h" #include "taichi/runtime/cuda/aot_module_loader_impl.h" +#include "taichi/runtime/dx12/aot_module_loader_impl.h" #include "taichi/rhi/cuda/cuda_driver.h" #include "taichi/platform/cuda/detect_cuda.h" @@ -101,4 +102,20 @@ TEST(LlvmAotTest, CudaKernel) { } } +#ifdef TI_WITH_DX12 +TEST(LlvmAotTest, DX12Kernel) { + directx12::AotModuleParams aot_params; + const auto folder_dir = getenv("TAICHI_AOT_FOLDER_PATH"); + + std::stringstream aot_mod_ss; + aot_mod_ss << folder_dir; + aot_params.module_path = aot_mod_ss.str(); + // FIXME: add executor. + auto mod = directx12::make_aot_module(aot_params, Arch::dx12); + auto *k_run = mod->get_kernel("run"); + EXPECT_TRUE(k_run); + // FIXME: launch the kernel and check result. +} +#endif + } // namespace taichi::lang diff --git a/tests/cpp/aot/python_scripts/kernel_aot_test1.py b/tests/cpp/aot/python_scripts/kernel_aot_test1.py index b38130802acdc..f3822708f4a78 100644 --- a/tests/cpp/aot/python_scripts/kernel_aot_test1.py +++ b/tests/cpp/aot/python_scripts/kernel_aot_test1.py @@ -38,5 +38,7 @@ def run(base: int, arr: ti.types.ndarray()): compile_kernel_aot_test1(arch=ti.vulkan) elif args.arch == "opengl": compile_kernel_aot_test1(arch=ti.opengl) + elif args.arch == "dx12": + compile_kernel_aot_test1(arch=ti.dx12) else: assert False diff --git a/tests/python/test_api.py b/tests/python/test_api.py index 7f90cb5831770..dab6a7f927e04 100644 --- a/tests/python/test_api.py +++ b/tests/python/test_api.py @@ -72,22 +72,23 @@ def _get_expected_matrix_apis(): 'atomic_sub', 'atomic_xor', 'axes', 'bit_cast', 'bit_shr', 'block_local', 'cache_read_only', 'cast', 'cc', 'ceil', 'cos', 'cpu', 'cuda', 'data_oriented', 'dataclass', 'deactivate', 'deactivate_all_snodes', - 'dx11', 'eig', 'exp', 'experimental', 'extension', 'f16', 'f32', 'f64', - 'field', 'float16', 'float32', 'float64', 'floor', 'func', 'get_addr', - 'get_compute_stream_device_time_elapsed_us', 'global_thread_idx', 'gpu', - 'graph', 'grouped', 'hex_to_rgb', 'i', 'i16', 'i32', 'i64', 'i8', 'ij', - 'ijk', 'ijkl', 'ijl', 'ik', 'ikl', 'il', 'init', 'int16', 'int32', 'int64', - 'int8', 'is_active', 'is_logging_effective', 'j', 'jk', 'jkl', 'jl', 'k', - 'kernel', 'kl', 'l', 'lang', 'length', 'linalg', 'log', 'loop_config', - 'math', 'max', 'mesh_local', 'mesh_patch_idx', 'metal', 'min', 'ndarray', - 'ndrange', 'no_activate', 'one', 'opengl', 'polar_decompose', 'pow', - 'profiler', 'randn', 'random', 'raw_div', 'raw_mod', 'ref', - 'rescale_index', 'reset', 'rgb_to_hex', 'root', 'round', 'rsqrt', 'select', - 'set_logging_level', 'simt', 'sin', 'solve', 'sparse_matrix_builder', - 'sqrt', 'static', 'static_assert', 'static_print', 'stop_grad', 'svd', - 'swizzle_generator', 'sym_eig', 'sync', 'tan', 'tanh', 'template', 'tools', - 'types', 'u16', 'u32', 'u64', 'u8', 'ui', 'uint16', 'uint32', 'uint64', - 'uint8', 'vulkan', 'wasm', 'x64', 'x86_64', 'zero' + 'dx11', 'dx12', 'eig', 'exp', 'experimental', 'extension', 'f16', 'f32', + 'f64', 'field', 'float16', 'float32', 'float64', 'floor', 'func', + 'get_addr', 'get_compute_stream_device_time_elapsed_us', + 'global_thread_idx', 'gpu', 'graph', 'grouped', 'hex_to_rgb', 'i', 'i16', + 'i32', 'i64', 'i8', 'ij', 'ijk', 'ijkl', 'ijl', 'ik', 'ikl', 'il', 'init', + 'int16', 'int32', 'int64', 'int8', 'is_active', 'is_logging_effective', + 'j', 'jk', 'jkl', 'jl', 'k', 'kernel', 'kl', 'l', 'lang', 'length', + 'linalg', 'log', 'loop_config', 'math', 'max', 'mesh_local', + 'mesh_patch_idx', 'metal', 'min', 'ndarray', 'ndrange', 'no_activate', + 'one', 'opengl', 'polar_decompose', 'pow', 'profiler', 'randn', 'random', + 'raw_div', 'raw_mod', 'ref', 'rescale_index', 'reset', 'rgb_to_hex', + 'root', 'round', 'rsqrt', 'select', 'set_logging_level', 'simt', 'sin', + 'solve', 'sparse_matrix_builder', 'sqrt', 'static', 'static_assert', + 'static_print', 'stop_grad', 'svd', 'swizzle_generator', 'sym_eig', 'sync', + 'tan', 'tanh', 'template', 'tools', 'types', 'u16', 'u32', 'u64', 'u8', + 'ui', 'uint16', 'uint32', 'uint64', 'uint8', 'vulkan', 'wasm', 'x64', + 'x86_64', 'zero' ] user_api[ti.ad] = [ 'FwdMode', 'Tape', 'clear_all_gradients', 'grad_for', 'grad_replaced', diff --git a/tests/test_config.json b/tests/test_config.json index 14e4cace901f5..7694b932613df 100644 --- a/tests/test_config.json +++ b/tests/test_config.json @@ -8,6 +8,10 @@ ["cpp", "aot", "python_scripts", "kernel_aot_test1.py"], "--arch=cuda" ], + "LlvmAotTest.DX12Kernel": [ + ["cpp", "aot", "python_scripts", "kernel_aot_test1.py"], + "--arch=dx12" + ], "LlvmAotTest.CpuField": [ ["cpp", "aot", "python_scripts", "field_aot_test.py"], "--arch=cpu"