diff --git a/cmake/TaichiCXXFlags.cmake b/cmake/TaichiCXXFlags.cmake index a2e555d22b44d..aceaae0428c57 100644 --- a/cmake/TaichiCXXFlags.cmake +++ b/cmake/TaichiCXXFlags.cmake @@ -20,7 +20,8 @@ endif () # Do not enable lto for APPLE since it made linking extremely slow. if (WIN32) if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -flto=thin") + set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS} -flto=thin") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -flto=thin") endif() endif() diff --git a/cmake/TaichiCore.cmake b/cmake/TaichiCore.cmake index 1bcafcf015530..c9584aa66f7df 100644 --- a/cmake/TaichiCore.cmake +++ b/cmake/TaichiCore.cmake @@ -372,24 +372,21 @@ add_subdirectory(external/SPIRV-Tools) # https://github.com/KhronosGroup/SPIRV-Tools/issues/1569#issuecomment-390250792 target_link_libraries(${CORE_LIBRARY_NAME} PRIVATE SPIRV-Tools-opt ${SPIRV_TOOLS}) +target_include_directories(${CORE_LIBRARY_NAME} PRIVATE external/SPIRV-Headers/include) +target_include_directories(${CORE_LIBRARY_NAME} PRIVATE external/SPIRV-Reflect) + +add_subdirectory(taichi/runtime/gfx) +target_link_libraries(${CORE_LIBRARY_NAME} PRIVATE gfx_runtime) + +# Vulkan Device API if (TI_WITH_VULKAN) include_directories(SYSTEM external/Vulkan-Headers/include) include_directories(SYSTEM external/volk) - target_include_directories(${CORE_LIBRARY_NAME} PRIVATE external/SPIRV-Headers/include) - target_include_directories(${CORE_LIBRARY_NAME} PRIVATE external/SPIRV-Reflect) - # By specifying SYSTEM, we suppressed the warnings from third-party headers. target_include_directories(${CORE_LIBRARY_NAME} SYSTEM PRIVATE external/VulkanMemoryAllocator/include) - if (LINUX) - # shaderc requires pthread - set(THREADS_PREFER_PTHREAD_FLAG ON) - find_package(Threads REQUIRED) - target_link_libraries(${CORE_LIBRARY_NAME} PRIVATE Threads::Threads) - endif() - if (APPLE) find_library(MOLTEN_VK libMoltenVK.dylib PATHS $HOMEBREW_CELLAR/molten-vk $VULKAN_SDK REQUIRED) configure_file(${MOLTEN_VK} ${CMAKE_BINARY_DIR}/libMoltenVK.dylib COPYONLY) @@ -398,9 +395,6 @@ if (TI_WITH_VULKAN) install(FILES ${CMAKE_BINARY_DIR}/libMoltenVK.dylib DESTINATION ${INSTALL_LIB_DIR}/runtime) endif() endif() - - add_subdirectory(taichi/runtime/vulkan) - target_link_libraries(${CORE_LIBRARY_NAME} PRIVATE vulkan_runtime) endif () diff --git a/taichi/aot/module_loader.cpp b/taichi/aot/module_loader.cpp index 908cc60db02c1..6526dfbc57969 100644 --- a/taichi/aot/module_loader.cpp +++ b/taichi/aot/module_loader.cpp @@ -1,6 +1,6 @@ #include "taichi/aot/module_loader.h" -#include "taichi/backends/vulkan/aot_module_loader_impl.h" +#include "taichi/runtime/gfx/aot_module_loader_impl.h" #include "taichi/backends/metal/aot_module_loader_impl.h" namespace taichi { @@ -32,19 +32,18 @@ Kernel *KernelTemplate::get_kernel( std::unique_ptr Module::load(Arch arch, std::any mod_params) { if (arch == Arch::vulkan) { #ifdef TI_WITH_VULKAN - return vulkan::make_aot_module(mod_params); -#else - TI_NOT_IMPLEMENTED + return gfx::make_aot_module(mod_params, arch); +#endif + } else if (arch == Arch::dx11) { +#ifdef TI_WITH_DX11 + return gfx::make_aot_module(mod_params, arch); #endif } else if (arch == Arch::metal) { #ifdef TI_WITH_METAL return metal::make_aot_module(mod_params); -#else - TI_NOT_IMPLEMENTED #endif - } else { - TI_NOT_IMPLEMENTED; } + TI_NOT_IMPLEMENTED; } Kernel *Module::get_kernel(const std::string &name) { diff --git a/taichi/backends/dx/dx_device.cpp b/taichi/backends/dx/dx_device.cpp index 436a40c221aee..27746b68e0f1f 100644 --- a/taichi/backends/dx/dx_device.cpp +++ b/taichi/backends/dx/dx_device.cpp @@ -22,7 +22,7 @@ void dump_buffer(ID3D11Device *device, void check_dx_error(HRESULT hr, const char *msg) { if (!SUCCEEDED(hr)) { - TI_ERROR("Error in {}: {}", msg, hr); + TI_ERROR("Error in {}: {:x}", msg, uint32_t(hr)); } } @@ -593,16 +593,21 @@ DeviceAllocation Dx11Device::allocate_memory(const AllocParams ¶ms) { void Dx11Device::dealloc_memory(DeviceAllocation handle) { uint32_t alloc_id = handle.alloc_id; - if (alloc_id_to_buffer_.count(alloc_id) == 0) - return; + if (alloc_id_to_buffer_.find(alloc_id) == alloc_id_to_buffer_.end()) + TI_ERROR("Invalid handle, possible double free?"); ID3D11Buffer *buf = alloc_id_to_buffer_[alloc_id]; buf->Release(); alloc_id_to_buffer_.erase(alloc_id); ID3D11UnorderedAccessView *uav = alloc_id_to_uav_[alloc_id]; uav->Release(); - ID3D11Buffer *cpucopy = alloc_id_to_cpucopy_[alloc_id]; - if (cpucopy) - cpucopy->Release(); + if (alloc_id_to_cpucopy_.find(alloc_id) != alloc_id_to_cpucopy_.end()) { + alloc_id_to_cpucopy_[alloc_id]->Release(); + alloc_id_to_cpucopy_.erase(alloc_id); + } + if (alloc_id_to_cb_copy_.find(alloc_id) != alloc_id_to_cb_copy_.end()) { + alloc_id_to_cb_copy_[alloc_id]->Release(); + alloc_id_to_cb_copy_.erase(alloc_id); + } alloc_id_to_uav_.erase(alloc_id); } @@ -724,10 +729,9 @@ ID3D11UnorderedAccessView *Dx11Device::alloc_id_to_uav(uint32_t alloc_id) { } ID3D11Buffer *Dx11Device::create_or_get_cb_buffer(uint32_t alloc_id) { - if (alloc_id_to_cb_copy_.count(alloc_id) > 0) { + if (alloc_id_to_cb_copy_.find(alloc_id) != alloc_id_to_cb_copy_.end()) { return alloc_id_to_cb_copy_[alloc_id]; } - assert(alloc_id_to_buffer_.count(alloc_id) > 0); ID3D11Buffer *buf = alloc_id_to_buffer_[alloc_id]; ID3D11Buffer *cb_buf; HRESULT hr = create_constant_buffer_copy(device_, buf, &cb_buf); diff --git a/taichi/backends/dx/dx_program.cpp b/taichi/backends/dx/dx_program.cpp index 1b76bdeffde9d..f41617536d3eb 100644 --- a/taichi/backends/dx/dx_program.cpp +++ b/taichi/backends/dx/dx_program.cpp @@ -3,18 +3,20 @@ #include "taichi/backends/dx/dx_program.h" #include "taichi/backends/dx/dx_device.h" -#include "taichi/backends/vulkan/snode_tree_manager.h" +#include "taichi/runtime/gfx/aot_module_builder_impl.h" +#include "taichi/runtime/gfx/snode_tree_manager.h" +#include "taichi/runtime/gfx/aot_module_loader_impl.h" namespace taichi { namespace lang { namespace directx11 { FunctionType compile_to_executable(Kernel *kernel, - vulkan::VkRuntime *runtime, - vulkan::SNodeTreeManager *snode_tree_mgr) { + gfx::GfxRuntime *runtime, + gfx::SNodeTreeManager *snode_tree_mgr) { auto handle = runtime->register_taichi_kernel( - std::move(vulkan::run_codegen(kernel, runtime->get_ti_device(), - snode_tree_mgr->get_compiled_structs()))); + std::move(gfx::run_codegen(kernel, runtime->get_ti_device(), + snode_tree_mgr->get_compiled_structs()))); return [runtime, handle](RuntimeContext &ctx) { runtime->launch_kernel(handle, &ctx); }; @@ -40,28 +42,52 @@ void Dx11ProgramImpl::materialize_runtime(MemoryPool *memory_pool, device_ = std::make_shared(); - vulkan::VkRuntime::Params params; + gfx::GfxRuntime::Params params; params.host_result_buffer = *result_buffer_ptr; params.device = device_.get(); - runtime_ = std::make_unique(std::move(params)); - snode_tree_mgr_ = std::make_unique(runtime_.get()); + runtime_ = std::make_unique(std::move(params)); + snode_tree_mgr_ = std::make_unique(runtime_.get()); } -void Dx11ProgramImpl::synchronize() { - TI_NOT_IMPLEMENTED; +void Dx11ProgramImpl::compile_snode_tree_types(SNodeTree *tree) { + if (runtime_) { + snode_tree_mgr_->materialize_snode_tree(tree); + } else { + gfx::CompiledSNodeStructs compiled_structs = + gfx::compile_snode_structs(*tree->root()); + aot_compiled_snode_structs_.push_back(compiled_structs); + } } void Dx11ProgramImpl::materialize_snode_tree(SNodeTree *tree, - uint64 *result_buffer_ptr) { + uint64 *result_buffer) { snode_tree_mgr_->materialize_snode_tree(tree); } std::unique_ptr Dx11ProgramImpl::make_aot_module_builder() { - return nullptr; + if (runtime_) { + return std::make_unique( + snode_tree_mgr_->get_compiled_structs(), Arch::dx11); + } else { + return std::make_unique( + aot_compiled_snode_structs_, Arch::dx11); + } } -void Dx11ProgramImpl::destroy_snode_tree(SNodeTree *snode_tree) { - TI_NOT_IMPLEMENTED; +DeviceAllocation Dx11ProgramImpl::allocate_memory_ndarray( + std::size_t alloc_size, + uint64 *result_buffer) { + return get_compute_device()->allocate_memory( + {alloc_size, /*host_write=*/false, /*host_read=*/false, + /*export_sharing=*/false}); +} + +std::unique_ptr Dx11ProgramImpl::make_aot_kernel(Kernel &kernel) { + spirv::lower(&kernel); + std::vector compiled_structs; + gfx::GfxRuntime::RegisterParams kparams = + gfx::run_codegen(&kernel, get_compute_device(), compiled_structs); + return std::make_unique(runtime_.get(), std::move(kparams)); } } // namespace lang diff --git a/taichi/backends/dx/dx_program.h b/taichi/backends/dx/dx_program.h index 603ee63abc3c4..1ae5522e563ba 100644 --- a/taichi/backends/dx/dx_program.h +++ b/taichi/backends/dx/dx_program.h @@ -3,8 +3,8 @@ #ifdef TI_WITH_DX11 #include "taichi/backends/dx/dx_device.h" -#include "taichi/runtime/vulkan/runtime.h" -#include "taichi/backends/vulkan/snode_tree_manager.h" +#include "taichi/runtime/gfx/runtime.h" +#include "taichi/runtime/gfx/snode_tree_manager.h" #include "taichi/program/program_impl.h" namespace taichi { @@ -13,26 +13,59 @@ namespace lang { class Dx11ProgramImpl : public ProgramImpl { public: Dx11ProgramImpl(CompileConfig &config); - FunctionType compile(Kernel *kernel, OffloadedStmt *offloaded) override; + std::size_t get_snode_num_dynamically_allocated( SNode *snode, uint64 *result_buffer) override { - return 0; + return 0; // TODO: support sparse } - std::unique_ptr make_aot_module_builder(); + + void compile_snode_tree_types(SNodeTree *tree) override; + void materialize_runtime(MemoryPool *memory_pool, KernelProfilerBase *profiler, uint64 **result_buffer_ptr) override; - virtual void materialize_snode_tree(SNodeTree *tree, - uint64 *result_buffer_ptr) override; - virtual void destroy_snode_tree(SNodeTree *snode_tree) override; - void synchronize() override; + + void materialize_snode_tree(SNodeTree *tree, uint64 *result_buffer) override; + + void synchronize() override { + runtime_->synchronize(); + } + + StreamSemaphore flush() override { + return runtime_->flush(); + } + + std::unique_ptr make_aot_module_builder() override; + + void destroy_snode_tree(SNodeTree *snode_tree) override { + TI_ASSERT(snode_tree_mgr_ != nullptr); + snode_tree_mgr_->destroy_snode_tree(snode_tree); + } + + DeviceAllocation allocate_memory_ndarray(std::size_t alloc_size, + uint64 *result_buffer) override; + + Device *get_compute_device() override { + return device_.get(); + } + + Device *get_graphics_device() override { + return device_.get(); + } + + DevicePtr get_snode_tree_device_ptr(int tree_id) override { + return snode_tree_mgr_->get_snode_tree_device_ptr(tree_id); + } + + std::unique_ptr make_aot_kernel(Kernel &kernel) override; private: std::shared_ptr device_{nullptr}; - std::unique_ptr runtime_{nullptr}; - std::unique_ptr snode_tree_mgr_{nullptr}; + std::unique_ptr runtime_{nullptr}; + std::unique_ptr snode_tree_mgr_{nullptr}; + std::vector aot_compiled_snode_structs_; }; } // namespace lang diff --git a/taichi/backends/opengl/opengl_device.cpp b/taichi/backends/opengl/opengl_device.cpp index b399cbb12e9ab..0da96f76d3685 100644 --- a/taichi/backends/opengl/opengl_device.cpp +++ b/taichi/backends/opengl/opengl_device.cpp @@ -197,7 +197,7 @@ void GLResourceBinder::rw_buffer(uint32_t set, DeviceAllocation alloc) { TI_ASSERT_INFO(set == 0, "OpenGL only supports set = 0, requested set = {}", set); - binding_map_[binding] = alloc.alloc_id; + ssbo_binding_map_[binding] = alloc.alloc_id; } void GLResourceBinder::buffer(uint32_t set, @@ -211,7 +211,9 @@ void GLResourceBinder::buffer(uint32_t set, void GLResourceBinder::buffer(uint32_t set, uint32_t binding, DeviceAllocation alloc) { - rw_buffer(set, binding, alloc); + TI_ASSERT_INFO(set == 0, "OpenGL only supports set = 0, requested set = {}", + set); + ubo_binding_map_[binding] = alloc.alloc_id; } void GLResourceBinder::image(uint32_t set, @@ -295,10 +297,17 @@ void GLCommandList::bind_pipeline(Pipeline *p) { void GLCommandList::bind_resources(ResourceBinder *_binder) { GLResourceBinder *binder = static_cast(_binder); - for (auto &[binding, buffer] : binder->binding_map()) { + for (auto &[binding, buffer] : binder->ssbo_binding_map()) { + auto cmd = std::make_unique(); + cmd->buffer = buffer; + cmd->index = binding; + recorded_commands_.push_back(std::move(cmd)); + } + for (auto &[binding, buffer] : binder->ubo_binding_map()) { auto cmd = std::make_unique(); cmd->buffer = buffer; cmd->index = binding; + cmd->target = GL_UNIFORM_BUFFER; recorded_commands_.push_back(std::move(cmd)); } } @@ -682,7 +691,7 @@ void GLCommandList::CmdBindPipeline::execute() { } void GLCommandList::CmdBindBufferToIndex::execute() { - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, index, buffer); + glBindBufferBase(target, index, buffer); check_opengl_error("glBindBufferBase"); } diff --git a/taichi/backends/opengl/opengl_device.h b/taichi/backends/opengl/opengl_device.h index 99eb1cba77eb5..7ab1e8c6dc185 100644 --- a/taichi/backends/opengl/opengl_device.h +++ b/taichi/backends/opengl/opengl_device.h @@ -53,12 +53,17 @@ class GLResourceBinder : public ResourceBinder { // index_width = 2 -> uint16 index void index_buffer(DevicePtr ptr, size_t index_width) override; - const std::unordered_map &binding_map() { - return binding_map_; + const std::unordered_map &ssbo_binding_map() { + return ssbo_binding_map_; + } + + const std::unordered_map &ubo_binding_map() { + return ubo_binding_map_; } private: - std::unordered_map binding_map_; + std::unordered_map ssbo_binding_map_; + std::unordered_map ubo_binding_map_; }; class GLPipeline : public Pipeline { @@ -141,6 +146,7 @@ class GLCommandList : public CommandList { struct CmdBindBufferToIndex : public Cmd { GLuint buffer{0}; GLuint index{0}; + GLenum target{GL_SHADER_STORAGE_BUFFER}; void execute() override; }; diff --git a/taichi/backends/vulkan/aot_module_loader_impl.h b/taichi/backends/vulkan/aot_module_loader_impl.h deleted file mode 100644 index 16230e1411306..0000000000000 --- a/taichi/backends/vulkan/aot_module_loader_impl.h +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once - -#include -#include -#include - -#include "taichi/backends/vulkan/aot_utils.h" -#include "taichi/runtime/vulkan/runtime.h" -#include "taichi/codegen/spirv/kernel_utils.h" -#include "taichi/aot/module_builder.h" -#include "taichi/aot/module_loader.h" -#include "taichi/backends/vulkan/aot_module_builder_impl.h" -#include "taichi/backends/vulkan/vulkan_graph_data.h" - -namespace taichi { -namespace lang { -namespace vulkan { -struct TI_DLL_EXPORT AotModuleParams { - std::string module_path; - VkRuntime *runtime{nullptr}; -}; - -TI_DLL_EXPORT std::unique_ptr make_aot_module(std::any mod_params); - -} // namespace vulkan -} // namespace lang -} // namespace taichi diff --git a/taichi/backends/vulkan/vulkan_program.cpp b/taichi/backends/vulkan/vulkan_program.cpp index cddb446ae469f..d7cac58be11b8 100644 --- a/taichi/backends/vulkan/vulkan_program.cpp +++ b/taichi/backends/vulkan/vulkan_program.cpp @@ -1,8 +1,8 @@ #include "taichi/backends/vulkan/vulkan_program.h" -#include "taichi/backends/vulkan/aot_module_builder_impl.h" -#include "taichi/backends/vulkan/snode_tree_manager.h" -#include "taichi/backends/vulkan/aot_module_loader_impl.h" +#include "taichi/runtime/gfx/aot_module_builder_impl.h" +#include "taichi/runtime/gfx/snode_tree_manager.h" +#include "taichi/runtime/gfx/aot_module_loader_impl.h" #if !defined(ANDROID) && !defined(TI_EMSCRIPTENED) #include "GLFW/glfw3.h" @@ -69,11 +69,11 @@ VulkanProgramImpl::VulkanProgramImpl(CompileConfig &config) } FunctionType compile_to_executable(Kernel *kernel, - VkRuntime *runtime, - SNodeTreeManager *snode_tree_mgr) { + gfx::GfxRuntime *runtime, + gfx::SNodeTreeManager *snode_tree_mgr) { auto handle = runtime->register_taichi_kernel( - run_codegen(kernel, runtime->get_ti_device(), - snode_tree_mgr->get_compiled_structs())); + gfx::run_codegen(kernel, runtime->get_ti_device(), + snode_tree_mgr->get_compiled_structs())); return [runtime, handle](RuntimeContext &ctx) { runtime->launch_kernel(handle, &ctx); }; @@ -144,20 +144,20 @@ void VulkanProgramImpl::materialize_runtime(MemoryPool *memory_pool, embedded_device_ = std::make_unique(evd_params); - vulkan::VkRuntime::Params params; + gfx::GfxRuntime::Params params; params.host_result_buffer = *result_buffer_ptr; params.device = embedded_device_->device(); - vulkan_runtime_ = std::make_unique(std::move(params)); + vulkan_runtime_ = std::make_unique(std::move(params)); snode_tree_mgr_ = - std::make_unique(vulkan_runtime_.get()); + std::make_unique(vulkan_runtime_.get()); } void VulkanProgramImpl::compile_snode_tree_types(SNodeTree *tree) { if (vulkan_runtime_) { snode_tree_mgr_->materialize_snode_tree(tree); } else { - CompiledSNodeStructs compiled_structs = - vulkan::compile_snode_structs(*tree->root()); + gfx::CompiledSNodeStructs compiled_structs = + gfx::compile_snode_structs(*tree->root()); aot_compiled_snode_structs_.push_back(compiled_structs); } } @@ -169,10 +169,11 @@ void VulkanProgramImpl::materialize_snode_tree(SNodeTree *tree, std::unique_ptr VulkanProgramImpl::make_aot_module_builder() { if (vulkan_runtime_) { - return std::make_unique( - snode_tree_mgr_->get_compiled_structs()); + return std::make_unique( + snode_tree_mgr_->get_compiled_structs(), Arch::vulkan); } else { - return std::make_unique(aot_compiled_snode_structs_); + return std::make_unique( + aot_compiled_snode_structs_, Arch::vulkan); } } @@ -187,11 +188,11 @@ DeviceAllocation VulkanProgramImpl::allocate_memory_ndarray( std::unique_ptr VulkanProgramImpl::make_aot_kernel( Kernel &kernel) { spirv::lower(&kernel); - std::vector compiled_structs; - VkRuntime::RegisterParams kparams = - run_codegen(&kernel, get_compute_device(), compiled_structs); - return std::make_unique(vulkan_runtime_.get(), - std::move(kparams)); + std::vector compiled_structs; + gfx::GfxRuntime::RegisterParams kparams = + gfx::run_codegen(&kernel, get_compute_device(), compiled_structs); + return std::make_unique(vulkan_runtime_.get(), + std::move(kparams)); } VulkanProgramImpl::~VulkanProgramImpl() { diff --git a/taichi/backends/vulkan/vulkan_program.h b/taichi/backends/vulkan/vulkan_program.h index b8d7a820fb7be..830910ebcc858 100644 --- a/taichi/backends/vulkan/vulkan_program.h +++ b/taichi/backends/vulkan/vulkan_program.h @@ -6,8 +6,8 @@ #include "taichi/backends/vulkan/vulkan_device_creator.h" #include "taichi/backends/vulkan/vulkan_utils.h" #include "taichi/backends/vulkan/vulkan_loader.h" -#include "taichi/runtime/vulkan/runtime.h" -#include "taichi/backends/vulkan/snode_tree_manager.h" +#include "taichi/runtime/gfx/runtime.h" +#include "taichi/runtime/gfx/snode_tree_manager.h" #include "taichi/backends/vulkan/vulkan_device.h" #include "vk_mem_alloc.h" @@ -35,7 +35,7 @@ class VulkanProgramImpl : public ProgramImpl { std::size_t get_snode_num_dynamically_allocated( SNode *snode, uint64 *result_buffer) override { - return 0; // TODO: support sparse in vulkan + return 0; // TODO: support sparse } void compile_snode_tree_types(SNodeTree *tree) override; @@ -56,7 +56,7 @@ class VulkanProgramImpl : public ProgramImpl { std::unique_ptr make_aot_module_builder() override; - virtual void destroy_snode_tree(SNodeTree *snode_tree) override { + void destroy_snode_tree(SNodeTree *snode_tree) override { TI_ASSERT(snode_tree_mgr_ != nullptr); snode_tree_mgr_->destroy_snode_tree(snode_tree); } @@ -88,8 +88,8 @@ class VulkanProgramImpl : public ProgramImpl { private: std::unique_ptr embedded_device_{nullptr}; - std::unique_ptr vulkan_runtime_{nullptr}; - std::unique_ptr snode_tree_mgr_{nullptr}; + std::unique_ptr vulkan_runtime_{nullptr}; + std::unique_ptr snode_tree_mgr_{nullptr}; std::vector aot_compiled_snode_structs_; }; } // namespace lang diff --git a/taichi/program/program.cpp b/taichi/program/program.cpp index 05ae5f128715d..0f02447d2b15e 100644 --- a/taichi/program/program.cpp +++ b/taichi/program/program.cpp @@ -193,7 +193,8 @@ void Program::materialize_runtime() { } void Program::destroy_snode_tree(SNodeTree *snode_tree) { - TI_ASSERT(arch_uses_llvm(config.arch) || config.arch == Arch::vulkan); + TI_ASSERT(arch_uses_llvm(config.arch) || config.arch == Arch::vulkan || + config.arch == Arch::dx11); program_impl_->destroy_snode_tree(snode_tree); free_snode_tree_ids_.push(snode_tree->id()); } diff --git a/taichi/python/export_misc.cpp b/taichi/python/export_misc.cpp index 2ede85517e551..44e1f88c0e72c 100644 --- a/taichi/python/export_misc.cpp +++ b/taichi/python/export_misc.cpp @@ -5,7 +5,7 @@ #include "taichi/backends/metal/api.h" #include "taichi/runtime/opengl/opengl_api.h" -#include "taichi/runtime/vulkan/runtime.h" +#include "taichi/runtime/gfx/runtime.h" #include "taichi/backends/dx/dx_api.h" #include "taichi/common/core.h" #include "taichi/common/interface.h" diff --git a/taichi/runtime/gfx/CMakeLists.txt b/taichi/runtime/gfx/CMakeLists.txt new file mode 100644 index 0000000000000..bc5bcdf03b5fb --- /dev/null +++ b/taichi/runtime/gfx/CMakeLists.txt @@ -0,0 +1,16 @@ +# ./taichi/runtime/gfx/CMakeLists.txt + +add_library(gfx_runtime) +target_sources(gfx_runtime + PRIVATE + runtime.cpp + snode_tree_manager.cpp + aot_module_builder_impl.cpp + aot_module_loader_impl.cpp + ) +target_include_directories(gfx_runtime + PRIVATE + ${PROJECT_SOURCE_DIR}/external/SPIRV-Tools/include + ${PROJECT_SOURCE_DIR}/external/eigen + ${PROJECT_SOURCE_DIR}/external/FP16/include + ) diff --git a/taichi/backends/vulkan/vulkan_graph_data.h b/taichi/runtime/gfx/aot_graph_data.h similarity index 55% rename from taichi/backends/vulkan/vulkan_graph_data.h rename to taichi/runtime/gfx/aot_graph_data.h index 6fa3cafc1e3e0..b9d8b4315ab65 100644 --- a/taichi/backends/vulkan/vulkan_graph_data.h +++ b/taichi/runtime/gfx/aot_graph_data.h @@ -1,12 +1,12 @@ #pragma once -#include "taichi/runtime/vulkan/runtime.h" +#include "taichi/runtime/gfx/runtime.h" namespace taichi { namespace lang { -namespace vulkan { +namespace gfx { class KernelImpl : public aot::Kernel { public: - explicit KernelImpl(VkRuntime *runtime, VkRuntime::RegisterParams &¶ms) + explicit KernelImpl(GfxRuntime *runtime, GfxRuntime::RegisterParams &¶ms) : runtime_(runtime), params_(std::move(params)) { handle_ = runtime_->register_taichi_kernel(params_); } @@ -15,15 +15,15 @@ class KernelImpl : public aot::Kernel { runtime_->launch_kernel(handle_, ctx); } - const VkRuntime::RegisterParams ¶ms() { + const GfxRuntime::RegisterParams ¶ms() { return params_; } private: - VkRuntime *const runtime_; - VkRuntime::KernelHandle handle_; - const VkRuntime::RegisterParams params_; + GfxRuntime *const runtime_; + GfxRuntime::KernelHandle handle_; + const GfxRuntime::RegisterParams params_; }; -} // namespace vulkan +} // namespace gfx } // namespace lang } // namespace taichi diff --git a/taichi/backends/vulkan/aot_module_builder_impl.cpp b/taichi/runtime/gfx/aot_module_builder_impl.cpp similarity index 93% rename from taichi/backends/vulkan/aot_module_builder_impl.cpp rename to taichi/runtime/gfx/aot_module_builder_impl.cpp index ed03800098bd1..48e3b7964de03 100644 --- a/taichi/backends/vulkan/aot_module_builder_impl.cpp +++ b/taichi/runtime/gfx/aot_module_builder_impl.cpp @@ -1,15 +1,15 @@ -#include "taichi/backends/vulkan/aot_module_builder_impl.h" +#include "taichi/runtime/gfx/aot_module_builder_impl.h" #include #include #include "taichi/aot/module_data.h" #include "taichi/codegen/spirv/spirv_codegen.h" -#include "taichi/backends/vulkan/vulkan_graph_data.h" +#include "taichi/runtime/gfx/aot_graph_data.h" namespace taichi { namespace lang { -namespace vulkan { +namespace gfx { namespace { class AotDataConverter { @@ -66,7 +66,6 @@ class AotDataConverter { aot::CompiledOffloadedTask res{}; res.type = offloaded_task_type_name(in.task_type); res.name = in.name; - // TODO: update range_hint after ndarray is supported on vulkan. if (in.range_for_attribs && in.range_for_attribs->const_begin && in.range_for_attribs->const_end) { res.range_hint = std::to_string(in.range_for_attribs->end - @@ -98,9 +97,11 @@ class AotDataConverter { } // namespace AotModuleBuilderImpl::AotModuleBuilderImpl( - const std::vector &compiled_structs) - : compiled_structs_(compiled_structs) { - aot_target_device_ = std::make_unique(Arch::vulkan); + const std::vector &compiled_structs, + Arch device_api_backend) + : compiled_structs_(compiled_structs), + device_api_backend_(device_api_backend) { + aot_target_device_ = std::make_unique(device_api_backend_); if (!compiled_structs.empty()) { ti_aot_data_.root_buffer_size = compiled_structs[0].root_size; } @@ -120,7 +121,7 @@ std::string AotModuleBuilderImpl::write_spv_file( void AotModuleBuilderImpl::dump(const std::string &output_dir, const std::string &filename) const { TI_WARN_IF(!filename.empty(), - "Filename prefix is ignored on vulkan backend."); + "Filename prefix is ignored on Unified Device API backends."); const std::string bin_path = fmt::format("{}/metadata.tcb", output_dir); write_to_binary_file(ti_aot_data_, bin_path); @@ -199,6 +200,6 @@ void AotModuleBuilderImpl::add_per_backend_tmpl(const std::string &identifier, ti_aot_data_.spirv_codes.push_back(compiled.task_spirv_source_codes); } -} // namespace vulkan +} // namespace gfx } // namespace lang } // namespace taichi diff --git a/taichi/backends/vulkan/aot_module_builder_impl.h b/taichi/runtime/gfx/aot_module_builder_impl.h similarity index 85% rename from taichi/backends/vulkan/aot_module_builder_impl.h rename to taichi/runtime/gfx/aot_module_builder_impl.h index 40dc4157c06f4..a6eeaaa3af143 100644 --- a/taichi/backends/vulkan/aot_module_builder_impl.h +++ b/taichi/runtime/gfx/aot_module_builder_impl.h @@ -4,19 +4,20 @@ #include #include "taichi/aot/module_builder.h" -#include "taichi/backends/vulkan/aot_utils.h" -#include "taichi/runtime/vulkan/runtime.h" +#include "taichi/runtime/gfx/aot_utils.h" +#include "taichi/runtime/gfx/runtime.h" #include "taichi/codegen/spirv/snode_struct_compiler.h" #include "taichi/codegen/spirv/kernel_utils.h" namespace taichi { namespace lang { -namespace vulkan { +namespace gfx { class AotModuleBuilderImpl : public AotModuleBuilder { public: explicit AotModuleBuilderImpl( - const std::vector &compiled_structs); + const std::vector &compiled_structs, + Arch device_api_backend); void dump(const std::string &output_dir, const std::string &filename) const override; @@ -45,8 +46,10 @@ class AotModuleBuilderImpl : public AotModuleBuilder { const std::vector &compiled_structs_; TaichiAotData ti_aot_data_; std::unique_ptr aot_target_device_; + + Arch device_api_backend_; }; -} // namespace vulkan +} // namespace gfx } // namespace lang } // namespace taichi diff --git a/taichi/backends/vulkan/aot_module_loader_impl.cpp b/taichi/runtime/gfx/aot_module_loader_impl.cpp similarity index 85% rename from taichi/backends/vulkan/aot_module_loader_impl.cpp rename to taichi/runtime/gfx/aot_module_loader_impl.cpp index 4ea34de89fc7c..497636b5b7c2c 100644 --- a/taichi/backends/vulkan/aot_module_loader_impl.cpp +++ b/taichi/runtime/gfx/aot_module_loader_impl.cpp @@ -1,30 +1,30 @@ -#include "taichi/backends/vulkan/aot_module_loader_impl.h" +#include "taichi/runtime/gfx/aot_module_loader_impl.h" #include #include -#include "taichi/runtime/vulkan/runtime.h" +#include "taichi/runtime/gfx/runtime.h" #include "taichi/aot/graph_data.h" namespace taichi { namespace lang { -namespace vulkan { +namespace gfx { namespace { class FieldImpl : public aot::Field { public: - explicit FieldImpl(VkRuntime *runtime, const aot::CompiledFieldData &field) + explicit FieldImpl(GfxRuntime *runtime, const aot::CompiledFieldData &field) : runtime_(runtime), field_(field) { } private: - VkRuntime *const runtime_; + GfxRuntime *const runtime_; aot::CompiledFieldData field_; }; class AotModuleImpl : public aot::Module { public: - explicit AotModuleImpl(const AotModuleParams ¶ms) - : runtime_(params.runtime) { + explicit AotModuleImpl(const AotModuleParams ¶ms, Arch device_api_backend) + : runtime_(params.runtime), device_api_backend_(device_api_backend) { const std::string bin_path = fmt::format("{}/metadata.tcb", params.module_path); read_from_binary_file(ti_aot_data_, bin_path); @@ -63,7 +63,7 @@ class AotModuleImpl : public aot::Module { // Module metadata Arch arch() const override { - return Arch::vulkan; + return device_api_backend_; } uint64_t version() const override { TI_NOT_IMPLEMENTED; @@ -82,7 +82,7 @@ class AotModuleImpl : public aot::Module { } bool get_kernel_params_by_name(const std::string &name, - VkRuntime::RegisterParams &kernel) { + GfxRuntime::RegisterParams &kernel) { for (int i = 0; i < ti_aot_data_.kernels.size(); ++i) { // Offloaded task names encode more than the name of the function, but for // AOT, only use the name of the function which should be the first part @@ -102,7 +102,7 @@ class AotModuleImpl : public aot::Module { std::unique_ptr make_new_kernel( const std::string &name) override { - VkRuntime::RegisterParams kparams; + GfxRuntime::RegisterParams kparams; if (!get_kernel_params_by_name(name, kparams)) { TI_DEBUG("Failed to load kernel {}", name); return nullptr; @@ -139,16 +139,18 @@ class AotModuleImpl : public aot::Module { } TaichiAotData ti_aot_data_; - VkRuntime *runtime_{nullptr}; + GfxRuntime *runtime_{nullptr}; + Arch device_api_backend_; }; } // namespace -std::unique_ptr make_aot_module(std::any mod_params) { +std::unique_ptr make_aot_module(std::any mod_params, + Arch device_api_backend) { AotModuleParams params = std::any_cast(mod_params); - return std::make_unique(params); + return std::make_unique(params, device_api_backend); } -} // namespace vulkan +} // namespace gfx } // namespace lang } // namespace taichi diff --git a/taichi/runtime/gfx/aot_module_loader_impl.h b/taichi/runtime/gfx/aot_module_loader_impl.h new file mode 100644 index 0000000000000..6fec0a063bf24 --- /dev/null +++ b/taichi/runtime/gfx/aot_module_loader_impl.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include +#include + +#include "taichi/runtime/gfx/aot_utils.h" +#include "taichi/runtime/gfx/runtime.h" +#include "taichi/runtime/gfx/aot_module_builder_impl.h" +#include "taichi/runtime/gfx/aot_graph_data.h" +#include "taichi/codegen/spirv/kernel_utils.h" +#include "taichi/aot/module_builder.h" +#include "taichi/aot/module_loader.h" + +namespace taichi { +namespace lang { +namespace gfx { + +struct TI_DLL_EXPORT AotModuleParams { + std::string module_path; + GfxRuntime *runtime{nullptr}; +}; + +TI_DLL_EXPORT std::unique_ptr make_aot_module( + std::any mod_params, + Arch device_api_backend); + +} // namespace gfx +} // namespace lang +} // namespace taichi diff --git a/taichi/backends/vulkan/aot_utils.h b/taichi/runtime/gfx/aot_utils.h similarity index 84% rename from taichi/backends/vulkan/aot_utils.h rename to taichi/runtime/gfx/aot_utils.h index 5c00d4023efa8..e8c1f5b0ea150 100644 --- a/taichi/backends/vulkan/aot_utils.h +++ b/taichi/runtime/gfx/aot_utils.h @@ -7,10 +7,10 @@ namespace taichi { namespace lang { -namespace vulkan { +namespace gfx { /** - * AOT module data for the vulkan backend. + * AOT module data for the Unified Device API backend. */ struct TaichiAotData { // BufferMetaData metadata; @@ -22,6 +22,6 @@ struct TaichiAotData { TI_IO_DEF(kernels, fields, root_buffer_size); }; -} // namespace vulkan +} // namespace gfx } // namespace lang } // namespace taichi diff --git a/taichi/runtime/vulkan/runtime.cpp b/taichi/runtime/gfx/runtime.cpp similarity index 94% rename from taichi/runtime/vulkan/runtime.cpp rename to taichi/runtime/gfx/runtime.cpp index 3a4d199824a13..d47e19c71ce46 100644 --- a/taichi/runtime/vulkan/runtime.cpp +++ b/taichi/runtime/gfx/runtime.cpp @@ -1,4 +1,4 @@ -#include "taichi/runtime/vulkan/runtime.h" +#include "taichi/runtime/gfx/runtime.h" #include "taichi/program/program.h" #include @@ -18,28 +18,9 @@ namespace taichi { namespace lang { -namespace vulkan { +namespace gfx { namespace { -class StopWatch { - public: - StopWatch() : begin_(std::chrono::system_clock::now()) { - } - - int get_micros() { - typedef std::chrono::duration fsec; - - auto now = std::chrono::system_clock::now(); - - fsec fs = now - begin_; - begin_ = now; - auto d = std::chrono::duration_cast(fs); - return d.count(); - } - - private: - std::chrono::time_point begin_; -}; class HostDeviceContextBlitter { public: @@ -123,7 +104,7 @@ class HostDeviceContextBlitter { break; } } - TI_ERROR("Vulkan does not support arg type={}", + TI_ERROR("Device does not support arg type={}", PrimitiveType::get(arg.dtype).to_string()); } while (0); } @@ -227,7 +208,7 @@ class HostDeviceContextBlitter { continue; } } - TI_ERROR("Vulkan does not support return value type={}", + TI_ERROR("Device does not support return value type={}", data_type_name(PrimitiveType::get(ret.dtype))); } } @@ -268,7 +249,7 @@ constexpr size_t kGtmpBufferSize = 1024 * 1024; constexpr size_t kListGenBufferSize = 32 << 20; // Info for launching a compiled Taichi kernel, which consists of a series of -// Vulkan pipelines. +// Unified Device API pipelines. CompiledTaichiKernel::CompiledTaichiKernel(const Params &ti_params) : ti_kernel_attribs_(*ti_params.ti_kernel_attribs), @@ -372,14 +353,14 @@ void CompiledTaichiKernel::generate_command_list( } } -VkRuntime::VkRuntime(const Params ¶ms) +GfxRuntime::GfxRuntime(const Params ¶ms) : device_(params.device), host_result_buffer_(params.host_result_buffer) { TI_ASSERT(host_result_buffer_ != nullptr); current_cmdlist_pending_since_ = high_res_clock::now(); init_nonroot_buffers(); } -VkRuntime::~VkRuntime() { +GfxRuntime::~GfxRuntime() { synchronize(); { decltype(ti_kernels_) tmp; @@ -388,8 +369,8 @@ VkRuntime::~VkRuntime() { global_tmps_buffer_.reset(); } -VkRuntime::KernelHandle VkRuntime::register_taichi_kernel( - VkRuntime::RegisterParams reg_params) { +GfxRuntime::KernelHandle GfxRuntime::register_taichi_kernel( + GfxRuntime::RegisterParams reg_params) { CompiledTaichiKernel::Params params; params.ti_kernel_attribs = &(reg_params.kernel_attribs); params.num_snode_trees = reg_params.num_snode_trees; @@ -414,7 +395,7 @@ VkRuntime::KernelHandle VkRuntime::register_taichi_kernel( return res; } -void VkRuntime::launch_kernel(KernelHandle handle, RuntimeContext *host_ctx) { +void GfxRuntime::launch_kernel(KernelHandle handle, RuntimeContext *host_ctx) { auto *ti_kernel = ti_kernels_[handle.id_].get(); std::unique_ptr args_buffer{nullptr}, @@ -553,13 +534,13 @@ void VkRuntime::launch_kernel(KernelHandle handle, RuntimeContext *host_ctx) { } } -void VkRuntime::synchronize() { +void GfxRuntime::synchronize() { flush(); device_->wait_idle(); ctx_buffers_.clear(); } -StreamSemaphore VkRuntime::flush() { +StreamSemaphore GfxRuntime::flush() { StreamSemaphore sema; if (current_cmdlist_) { sema = device_->get_compute_stream()->submit(current_cmdlist_.get()); @@ -572,11 +553,11 @@ StreamSemaphore VkRuntime::flush() { return sema; } -Device *VkRuntime::get_ti_device() const { +Device *GfxRuntime::get_ti_device() const { return device_; } -void VkRuntime::init_nonroot_buffers() { +void GfxRuntime::init_nonroot_buffers() { global_tmps_buffer_ = device_->allocate_memory_unique( {kGtmpBufferSize, /*host_write=*/false, /*host_read=*/false, @@ -598,7 +579,7 @@ void VkRuntime::init_nonroot_buffers() { stream->submit_synced(cmdlist.get()); } -void VkRuntime::add_root_buffer(size_t root_buffer_size) { +void GfxRuntime::add_root_buffer(size_t root_buffer_size) { if (root_buffer_size == 0) { root_buffer_size = 4; // there might be empty roots } @@ -617,14 +598,14 @@ void VkRuntime::add_root_buffer(size_t root_buffer_size) { root_buffers_size_map_[root_buffers_.back().get()] = root_buffer_size; } -DeviceAllocation *VkRuntime::get_root_buffer(int id) const { +DeviceAllocation *GfxRuntime::get_root_buffer(int id) const { if (id >= root_buffers_.size()) { TI_ERROR("root buffer id {} not found", id); } return root_buffers_[id].get(); } -size_t VkRuntime::get_root_buffer_size(int id) const { +size_t GfxRuntime::get_root_buffer_size(int id) const { auto it = root_buffers_size_map_.find(root_buffers_[id].get()); if (id >= root_buffers_.size() || it == root_buffers_size_map_.end()) { TI_ERROR("root buffer id {} not found", id); @@ -632,7 +613,7 @@ size_t VkRuntime::get_root_buffer_size(int id) const { return it->second; } -VkRuntime::RegisterParams run_codegen( +GfxRuntime::RegisterParams run_codegen( Kernel *kernel, Device *device, const std::vector &compiled_structs) { @@ -647,12 +628,12 @@ VkRuntime::RegisterParams run_codegen( params.enable_spv_opt = kernel->program->config.external_optimization_level > 0; spirv::KernelCodegen codegen(params); - VkRuntime::RegisterParams res; + GfxRuntime::RegisterParams res; codegen.run(res.kernel_attribs, res.task_spirv_source_codes); res.num_snode_trees = compiled_structs.size(); return res; } -} // namespace vulkan +} // namespace gfx } // namespace lang } // namespace taichi diff --git a/taichi/runtime/vulkan/runtime.h b/taichi/runtime/gfx/runtime.h similarity index 93% rename from taichi/runtime/vulkan/runtime.h rename to taichi/runtime/gfx/runtime.h index c4f356783bcea..cbe1a48d758d1 100644 --- a/taichi/runtime/vulkan/runtime.h +++ b/taichi/runtime/gfx/runtime.h @@ -14,7 +14,7 @@ namespace taichi { namespace lang { -namespace vulkan { +namespace gfx { using namespace taichi::lang::spirv; @@ -73,20 +73,20 @@ class CompiledTaichiKernel { std::vector> pipelines_; }; -class TI_DLL_EXPORT VkRuntime { +class TI_DLL_EXPORT GfxRuntime { public: struct Params { uint64_t *host_result_buffer{nullptr}; Device *device{nullptr}; }; - explicit VkRuntime(const Params ¶ms); + explicit GfxRuntime(const Params ¶ms); // To make Pimpl + std::unique_ptr work - ~VkRuntime(); + ~GfxRuntime(); class KernelHandle { private: - friend class VkRuntime; + friend class GfxRuntime; int id_ = -1; }; @@ -113,7 +113,7 @@ class TI_DLL_EXPORT VkRuntime { size_t get_root_buffer_size(int id) const; private: - friend class taichi::lang::vulkan::SNodeTreeManager; + friend class taichi::lang::gfx::SNodeTreeManager; void init_nonroot_buffers(); @@ -135,11 +135,11 @@ class TI_DLL_EXPORT VkRuntime { std::unordered_map root_buffers_size_map_; }; -VkRuntime::RegisterParams run_codegen( +GfxRuntime::RegisterParams run_codegen( Kernel *kernel, Device *device, const std::vector &compiled_structs); -} // namespace vulkan +} // namespace gfx } // namespace lang } // namespace taichi diff --git a/taichi/backends/vulkan/snode_tree_manager.cpp b/taichi/runtime/gfx/snode_tree_manager.cpp similarity index 73% rename from taichi/backends/vulkan/snode_tree_manager.cpp rename to taichi/runtime/gfx/snode_tree_manager.cpp index b7d4816ae42d9..0ace8e561304d 100644 --- a/taichi/backends/vulkan/snode_tree_manager.cpp +++ b/taichi/runtime/gfx/snode_tree_manager.cpp @@ -1,17 +1,17 @@ -#include "taichi/backends/vulkan/snode_tree_manager.h" +#include "taichi/runtime/gfx/snode_tree_manager.h" -#include "taichi/runtime/vulkan/runtime.h" +#include "taichi/runtime/gfx/runtime.h" namespace taichi { namespace lang { -namespace vulkan { +namespace gfx { -SNodeTreeManager::SNodeTreeManager(VkRuntime *rtm) : runtime_(rtm) { +SNodeTreeManager::SNodeTreeManager(GfxRuntime *rtm) : runtime_(rtm) { } void SNodeTreeManager::materialize_snode_tree(SNodeTree *tree) { auto *const root = tree->root(); - CompiledSNodeStructs compiled_structs = vulkan::compile_snode_structs(*root); + CompiledSNodeStructs compiled_structs = compile_snode_structs(*root); runtime_->add_root_buffer(compiled_structs.root_size); compiled_snode_structs_.push_back(compiled_structs); } @@ -33,6 +33,6 @@ DevicePtr SNodeTreeManager::get_snode_tree_device_ptr(int tree_id) { return runtime_->root_buffers_[tree_id]->get_ptr(); } -} // namespace vulkan +} // namespace gfx } // namespace lang } // namespace taichi diff --git a/taichi/backends/vulkan/snode_tree_manager.h b/taichi/runtime/gfx/snode_tree_manager.h similarity index 79% rename from taichi/backends/vulkan/snode_tree_manager.h rename to taichi/runtime/gfx/snode_tree_manager.h index d946c308632b9..3add69e6c4a41 100644 --- a/taichi/backends/vulkan/snode_tree_manager.h +++ b/taichi/runtime/gfx/snode_tree_manager.h @@ -8,12 +8,12 @@ namespace taichi { namespace lang { -namespace vulkan { +namespace gfx { -class VkRuntime; +class GfxRuntime; /** - * @brief Manages the SNodeTrees for the Vulkan backend. + * @brief Manages the SNodeTrees for the underlying backend. * */ class SNodeTreeManager { @@ -21,7 +21,7 @@ class SNodeTreeManager { using CompiledSNodeStructs = taichi::lang::spirv::CompiledSNodeStructs; public: - explicit SNodeTreeManager(VkRuntime *rtm); + explicit SNodeTreeManager(GfxRuntime *rtm); const std::vector &get_compiled_structs() const { return compiled_snode_structs_; @@ -34,10 +34,10 @@ class SNodeTreeManager { DevicePtr get_snode_tree_device_ptr(int tree_id); private: - VkRuntime *const runtime_; + GfxRuntime *const runtime_; std::vector compiled_snode_structs_; }; -} // namespace vulkan +} // namespace gfx } // namespace lang } // namespace taichi diff --git a/taichi/runtime/opengl/opengl_api.cpp b/taichi/runtime/opengl/opengl_api.cpp index d4c0c93667592..daa6144e398bd 100644 --- a/taichi/runtime/opengl/opengl_api.cpp +++ b/taichi/runtime/opengl/opengl_api.cpp @@ -461,12 +461,12 @@ void DeviceCompiledTaichiKernel::launch(RuntimeContext &ctx, for (const auto &task : program_.tasks) { auto binder = compiled_pipeline_[i]->resource_binder(); auto &core_bufs = runtime->impl->core_bufs; - binder->buffer(0, static_cast(GLBufId::Runtime), core_bufs.runtime); + binder->rw_buffer(0, static_cast(GLBufId::Runtime), core_bufs.runtime); if (program_.used.buf_data) - binder->buffer(0, static_cast(GLBufId::Root), core_bufs.root); - binder->buffer(0, static_cast(GLBufId::Gtmp), core_bufs.gtmp); + binder->rw_buffer(0, static_cast(GLBufId::Root), core_bufs.root); + binder->rw_buffer(0, static_cast(GLBufId::Gtmp), core_bufs.gtmp); if (program_.args_buf_size || program_.ret_buf_size) - binder->buffer(0, static_cast(GLBufId::Args), *args_buf_); + binder->rw_buffer(0, static_cast(GLBufId::Args), *args_buf_); // TODO: properly assert and throw if we bind more than allowed SSBOs. // On most devices this number is 8. But I need to look up how // to query this information so currently this is thrown from OpenGl. @@ -475,9 +475,9 @@ void DeviceCompiledTaichiKernel::launch(RuntimeContext &ctx, DeviceAllocation *ptr = static_cast((void *)ctx.args[arg_id]); - binder->buffer(0, bind_id, *ptr); + binder->rw_buffer(0, bind_id, *ptr); } else { - binder->buffer(0, bind_id, ext_arr_bufs_[arg_id]); + binder->rw_buffer(0, bind_id, ext_arr_bufs_[arg_id]); } } diff --git a/taichi/runtime/vulkan/CMakeLists.txt b/taichi/runtime/vulkan/CMakeLists.txt deleted file mode 100644 index 00ecee7a09caf..0000000000000 --- a/taichi/runtime/vulkan/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -# ./taichi/runtime/vulkan/CMakeLists.txt - -add_library(vulkan_runtime) -target_sources(vulkan_runtime - PRIVATE - runtime.cpp - ) -target_include_directories(vulkan_runtime - PRIVATE - ${PROJECT_SOURCE_DIR}/external/SPIRV-Tools/include - ${PROJECT_SOURCE_DIR}/external/eigen - ${PROJECT_SOURCE_DIR}/external/FP16/include - ) diff --git a/tests/cpp/aot/aot_save_load_test.cpp b/tests/cpp/aot/aot_save_load_test.cpp index e0c0211fb195a..06d36a7515edb 100644 --- a/tests/cpp/aot/aot_save_load_test.cpp +++ b/tests/cpp/aot/aot_save_load_test.cpp @@ -7,8 +7,8 @@ #include "tests/cpp/program/test_program.h" #include "taichi/aot/graph_data.h" #include "taichi/program/graph_builder.h" +#include "taichi/runtime/gfx/aot_module_loader_impl.h" #ifdef TI_WITH_VULKAN -#include "taichi/backends/vulkan/aot_module_loader_impl.h" #include "taichi/backends/device.h" #include "taichi/backends/vulkan/vulkan_device.h" #include "taichi/backends/vulkan/vulkan_device_creator.h" @@ -121,7 +121,7 @@ using namespace lang; #ifdef TI_WITH_VULKAN [[maybe_unused]] static void write_devalloc( - taichi::lang::vulkan::VkRuntime *vulkan_runtime, + taichi::lang::gfx::GfxRuntime *vulkan_runtime, taichi::lang::DeviceAllocation &alloc, const void *data, size_t size) { @@ -132,7 +132,7 @@ using namespace lang; } [[maybe_unused]] static void load_devalloc( - taichi::lang::vulkan::VkRuntime *vulkan_runtime, + taichi::lang::gfx::GfxRuntime *vulkan_runtime, taichi::lang::DeviceAllocation &alloc, void *data, size_t size) { @@ -169,14 +169,14 @@ TEST(AotSaveLoad, Vulkan) { std::make_unique(evd_params); // Create Vulkan runtime - vulkan::VkRuntime::Params params; + gfx::GfxRuntime::Params params; params.host_result_buffer = result_buffer; params.device = embedded_device->device(); auto vulkan_runtime = - std::make_unique(std::move(params)); + std::make_unique(std::move(params)); // Run AOT module loader - vulkan::AotModuleParams mod_params; + gfx::AotModuleParams mod_params; mod_params.module_path = "."; mod_params.runtime = vulkan_runtime.get(); @@ -242,14 +242,14 @@ TEST(AotSaveLoad, VulkanNdarray) { std::make_unique(evd_params); // Create Vulkan runtime - vulkan::VkRuntime::Params params; + gfx::GfxRuntime::Params params; params.host_result_buffer = result_buffer; params.device = embedded_device->device(); auto vulkan_runtime = - std::make_unique(std::move(params)); + std::make_unique(std::move(params)); // Run AOT module loader - vulkan::AotModuleParams mod_params; + gfx::AotModuleParams mod_params; mod_params.module_path = "."; mod_params.runtime = vulkan_runtime.get(); @@ -352,14 +352,14 @@ TEST(AotLoadGraph, Vulkan) { static_cast( embedded_device->device()); // Create Vulkan runtime - vulkan::VkRuntime::Params params; + gfx::GfxRuntime::Params params; params.host_result_buffer = result_buffer; params.device = device_; auto vulkan_runtime = - std::make_unique(std::move(params)); + std::make_unique(std::move(params)); // Run AOT module loader - vulkan::AotModuleParams mod_params; + gfx::AotModuleParams mod_params; mod_params.module_path = "."; mod_params.runtime = vulkan_runtime.get(); diff --git a/tests/cpp/backends/dx11_device_test.cpp b/tests/cpp/backends/dx11_device_test.cpp index a8584170ee75b..d35b62a015585 100644 --- a/tests/cpp/backends/dx11_device_test.cpp +++ b/tests/cpp/backends/dx11_device_test.cpp @@ -127,8 +127,8 @@ TEST(Dx11ProgramTest, MaterializeRuntimeTest) { /* This test needs allocate_memory because of the call stack here: Dx11ProgramImpl::materialize_runtime - - VkRuntime::VkRuntime - - VkRuntime::init_buffers + - GfxRuntime::GfxRuntime + - GfxRuntime::init_buffers - Dx11Device::allocate_memory_unique - Dx11Device::get_compute_stream - Dx11Stream::new_command_list diff --git a/tests/python/test_ad_basics.py b/tests/python/test_ad_basics.py index a870e643a6d9f..7d4191760baaa 100644 --- a/tests/python/test_ad_basics.py +++ b/tests/python/test_ad_basics.py @@ -90,7 +90,7 @@ def test_poly(tifunc): (lambda x: ti.asin(x), lambda x: np.arcsin(x)), ]) @if_has_autograd -@test_utils.test(exclude=[ti.vulkan]) +@test_utils.test(exclude=[ti.vulkan, ti.dx11]) def test_trigonometric(tifunc, npfunc): grad_test(tifunc, npfunc) @@ -319,7 +319,7 @@ def work(): assert 'RandStmt not supported' in e.value.args[0] -@test_utils.test(exclude=[ti.cc, ti.vulkan, ti.opengl]) +@test_utils.test(exclude=[ti.cc, ti.vulkan, ti.opengl, ti.dx11]) def test_ad_frac(): @ti.func def frac(x): diff --git a/tests/python/test_clear_all_gradients.py b/tests/python/test_clear_all_gradients.py index f93e780494b4d..fbde26b3a740e 100644 --- a/tests/python/test_clear_all_gradients.py +++ b/tests/python/test_clear_all_gradients.py @@ -4,7 +4,7 @@ from tests import test_utils -@test_utils.test(exclude=[ti.vulkan]) +@test_utils.test(exclude=[ti.vulkan, ti.dx11]) def test_clear_all_gradients(): x = ti.field(ti.f32) y = ti.field(ti.f32) diff --git a/tests/python/test_element_wise.py b/tests/python/test_element_wise.py index 642d7cb606e7f..cecbccc9fb6b6 100644 --- a/tests/python/test_element_wise.py +++ b/tests/python/test_element_wise.py @@ -11,7 +11,7 @@ def _c_mod(a, b): @pytest.mark.parametrize('lhs_is_mat,rhs_is_mat', [(True, True), (True, False), (False, True)]) -@test_utils.test(fast_math=False, exclude=[ti.vulkan]) +@test_utils.test(fast_math=False, exclude=[ti.vulkan, ti.dx11]) def test_binary_f(lhs_is_mat, rhs_is_mat): x = ti.Matrix.field(3, 2, ti.f32, 16) if lhs_is_mat: diff --git a/tests/python/test_f16.py b/tests/python/test_f16.py index 2cf9728b65b7d..b27526546ad6e 100644 --- a/tests/python/test_f16.py +++ b/tests/python/test_f16.py @@ -102,7 +102,7 @@ def init(): @pytest.mark.skipif(not has_paddle(), reason='Paddle not installed.') -@test_utils.test(arch=archs_support_f16, exclude=ti.vulkan) +@test_utils.test(arch=archs_support_f16, exclude=[ti.vulkan, ti.dx11]) def test_to_paddle(): import paddle n = 16 @@ -123,7 +123,7 @@ def init(): @pytest.mark.skipif(not has_paddle(), reason='Paddle not installed.') -@test_utils.test(arch=archs_support_f16, exclude=ti.vulkan) +@test_utils.test(arch=archs_support_f16, exclude=[ti.vulkan, ti.dx11]) def test_from_paddle(): import paddle n = 16 diff --git a/tests/python/test_fields_builder.py b/tests/python/test_fields_builder.py index 590fc2986e27a..888f8773be12b 100644 --- a/tests/python/test_fields_builder.py +++ b/tests/python/test_fields_builder.py @@ -38,7 +38,7 @@ def assign_field_multiple(): assert x[i] == i -@test_utils.test(arch=[ti.cpu, ti.cuda, ti.vulkan, ti.metal]) +@test_utils.test(arch=[ti.cpu, ti.cuda, ti.vulkan, ti.dx11, ti.metal]) def test_fields_builder_dense(): shape = 5 fb1 = ti.FieldsBuilder() @@ -142,7 +142,7 @@ def assign_field_multiple_struct_for(): # See https://docs.taichi-lang.org/docs/type#primitive-types for more details. @pytest.mark.parametrize('test_1d_size', [1, 10, 100]) @pytest.mark.parametrize('field_type', [ti.f32, ti.i32]) -@test_utils.test(arch=[ti.cpu, ti.cuda, ti.vulkan, ti.metal]) +@test_utils.test(arch=[ti.cpu, ti.cuda, ti.vulkan, ti.dx11, ti.metal]) def test_fields_builder_destroy(test_1d_size, field_type): def test_for_single_destroy_multi_fields(): fb = ti.FieldsBuilder() @@ -180,7 +180,7 @@ def test_for_raise_destroy_twice(): c.destroy() -@test_utils.test(arch=[ti.cpu, ti.cuda, ti.vulkan]) +@test_utils.test(arch=[ti.cpu, ti.cuda, ti.vulkan, ti.dx11]) def test_field_initialize_zero(): fb0 = ti.FieldsBuilder() a = ti.field(ti.i32) diff --git a/tests/python/test_loop_grad.py b/tests/python/test_loop_grad.py index ab209613430a2..11482fb39384c 100644 --- a/tests/python/test_loop_grad.py +++ b/tests/python/test_loop_grad.py @@ -2,7 +2,7 @@ from tests import test_utils -@test_utils.test(exclude=[ti.vulkan]) +@test_utils.test(exclude=[ti.vulkan, ti.dx11]) def test_loop_grad(): x = ti.field(ti.f32) @@ -32,7 +32,7 @@ def func(): assert x.grad[k, i] == 2**(m - 1 - i) -@test_utils.test(exclude=[ti.vulkan]) +@test_utils.test(exclude=[ti.vulkan, ti.dx11]) def test_loop_grad_complex(): return # This case is not supported yet x = ti.field(ti.f32) diff --git a/tests/python/test_print.py b/tests/python/test_print.py index 3e62d19199378..e9f0943123755 100644 --- a/tests/python/test_print.py +++ b/tests/python/test_print.py @@ -23,7 +23,8 @@ def func(): # TODO: As described by @k-ye above, what we want to ensure # is that, the content shows on console is *correct*. -@test_utils.test(exclude=[ti.vulkan]) # TODO(changyu): enable ti.vulkan +@test_utils.test(exclude=[ti.vulkan, + ti.dx11]) # TODO(changyu): enable ti.vulkan def test_multi_print(): @ti.kernel def func(x: ti.i32, y: ti.f32): @@ -33,7 +34,8 @@ def func(x: ti.i32, y: ti.f32): ti.sync() -@test_utils.test(exclude=[ti.vulkan]) # TODO(changyu): enable ti.vulkan +@test_utils.test(exclude=[ti.vulkan, + ti.dx11]) # TODO(changyu): enable ti.vulkan def test_print_string(): @ti.kernel def func(x: ti.i32, y: ti.f32): @@ -45,7 +47,8 @@ def func(x: ti.i32, y: ti.f32): ti.sync() -@test_utils.test(exclude=[ti.vulkan]) # TODO(changyu): enable ti.vulkan +@test_utils.test(exclude=[ti.vulkan, + ti.dx11]) # TODO(changyu): enable ti.vulkan def test_print_matrix(): x = ti.Matrix.field(2, 3, dtype=ti.f32, shape=()) y = ti.Vector.field(3, dtype=ti.f32, shape=3) @@ -61,7 +64,8 @@ def func(k: ti.f32): ti.sync() -@test_utils.test(exclude=[ti.vulkan]) # TODO(changyu): enable ti.vulkan +@test_utils.test(exclude=[ti.vulkan, + ti.dx11]) # TODO(changyu): enable ti.vulkan def test_print_sep_end(): @ti.kernel def func(): @@ -81,7 +85,8 @@ def func(): ti.sync() -@test_utils.test(exclude=[ti.vulkan]) # TODO(changyu): enable ti.vulkan +@test_utils.test(exclude=[ti.vulkan, + ti.dx11]) # TODO(changyu): enable ti.vulkan def test_print_multiple_threads(): x = ti.field(dtype=ti.f32, shape=(128, )) @@ -97,7 +102,8 @@ def func(k: ti.f32): ti.sync() -@test_utils.test(exclude=[ti.vulkan]) # TODO(changyu): enable ti.vulkan +@test_utils.test(exclude=[ti.vulkan, + ti.dx11]) # TODO(changyu): enable ti.vulkan def test_print_list(): x = ti.Matrix.field(2, 3, dtype=ti.f32, shape=(2, 3)) y = ti.Vector.field(3, dtype=ti.f32, shape=()) diff --git a/tests/python/test_reduction.py b/tests/python/test_reduction.py index 88e7f93d097f5..a1e62a741d769 100644 --- a/tests/python/test_reduction.py +++ b/tests/python/test_reduction.py @@ -33,8 +33,9 @@ def _test_reduction_single(dtype, criterion, op): N = 1024 * 1024 - if (ti.lang.impl.current_cfg().arch == ti.opengl or - ti.lang.impl.current_cfg().arch == ti.vulkan) and dtype == ti.f32: + if (ti.lang.impl.current_cfg().arch == ti.opengl + or ti.lang.impl.current_cfg().arch == ti.vulkan + or ti.lang.impl.current_cfg().arch == ti.dx11) and dtype == ti.f32: # OpenGL/Vulkan are not capable of such large number in its float32... N = 1024 * 16 diff --git a/tests/python/test_struct.py b/tests/python/test_struct.py index e6f7b240f91f6..e499f41a190b7 100644 --- a/tests/python/test_struct.py +++ b/tests/python/test_struct.py @@ -61,7 +61,7 @@ def test_linear_nested_aos(): assert y[i] == i + 123 -@test_utils.test(exclude=[ti.vulkan]) +@test_utils.test(exclude=[ti.vulkan, ti.dx11]) def test_2d_nested(): x = ti.field(ti.i32) diff --git a/tests/python/test_torch_io.py b/tests/python/test_torch_io.py index 1f05bfdcb1f3e..0c96605da431a 100644 --- a/tests/python/test_torch_io.py +++ b/tests/python/test_torch_io.py @@ -11,7 +11,7 @@ @pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.') -@test_utils.test(exclude=[ti.opengl, ti.vulkan]) +@test_utils.test(exclude=[ti.opengl, ti.vulkan, ti.dx11]) def test_io_devices(): n = 32 x = ti.field(dtype=ti.i32, shape=n) @@ -48,7 +48,7 @@ def store(y: ti.types.ndarray()): @pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.') -@test_utils.test(exclude=[ti.opengl, ti.vulkan]) +@test_utils.test(exclude=[ti.opengl, ti.vulkan, ti.dx11]) def test_io(): n = 32 @@ -88,7 +88,7 @@ def backward(ctx, outp_grad): @pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.') -@test_utils.test(exclude=[ti.opengl, ti.vulkan]) +@test_utils.test(exclude=[ti.opengl, ti.vulkan, ti.dx11]) def test_io_2d(): n = 32 @@ -112,7 +112,7 @@ def forward(ctx, inp): @pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.') -@test_utils.test(exclude=[ti.opengl, ti.vulkan]) +@test_utils.test(exclude=[ti.opengl, ti.vulkan, ti.dx11]) def test_io_3d(): n = 16 @@ -138,7 +138,7 @@ def forward(ctx, inp): @pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.') -@test_utils.test(exclude=[ti.opengl, ti.vulkan]) +@test_utils.test(exclude=[ti.opengl, ti.vulkan, ti.dx11]) def test_io_simple(): n = 32 @@ -165,7 +165,7 @@ def test_io_simple(): @pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.') -@test_utils.test(exclude=[ti.opengl, ti.vulkan]) +@test_utils.test(exclude=[ti.opengl, ti.vulkan, ti.dx11]) def test_io_zeros(): mat = ti.Matrix.field(2, 6, dtype=ti.f32, shape=(), needs_grad=True) zeros = torch.zeros((2, 6)) @@ -179,7 +179,7 @@ def test_io_zeros(): @pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.') -@test_utils.test(exclude=[ti.opengl, ti.vulkan]) +@test_utils.test(exclude=[ti.opengl, ti.vulkan, ti.dx11]) def test_io_struct(): n = 16 x1 = ti.Struct.field({"a": ti.i32, "b": ti.f32}, shape=(n, )) @@ -199,7 +199,7 @@ def test_io_struct(): @pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.') -@test_utils.test(exclude=[ti.opengl, ti.vulkan]) +@test_utils.test(exclude=[ti.opengl, ti.vulkan, ti.dx11]) def test_fused_kernels(): n = 12 X = ti.Matrix.field(3, 2, ti.f32, shape=(n, n, n)) @@ -211,7 +211,7 @@ def test_fused_kernels(): @pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.') -@test_utils.test(exclude=[ti.opengl, ti.vulkan]) +@test_utils.test(exclude=[ti.opengl, ti.vulkan, ti.dx11]) def test_device(): n = 12 X = ti.Matrix.field(3, 2, ti.f32, shape=(n, n, n)) @@ -222,7 +222,7 @@ def test_device(): @pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.') -@test_utils.test(exclude=[ti.opengl, ti.vulkan]) +@test_utils.test(exclude=[ti.opengl, ti.vulkan, ti.dx11]) def test_shape_matrix(): n = 12 x = ti.Matrix.field(3, 2, ti.f32, shape=(n, n)) @@ -242,7 +242,7 @@ def test_shape_matrix(): @pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.') -@test_utils.test(exclude=[ti.opengl, ti.vulkan]) +@test_utils.test(exclude=[ti.opengl, ti.vulkan, ti.dx11]) def test_shape_vector(): n = 12 x = ti.Vector.field(3, ti.f32, shape=(n, n)) @@ -261,7 +261,7 @@ def test_shape_vector(): @pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.') -@test_utils.test(exclude=[ti.opengl, ti.vulkan]) +@test_utils.test(exclude=[ti.opengl, ti.vulkan, ti.dx11]) def test_torch_zero(): @ti.kernel def test_torch(arr: ti.types.ndarray()): diff --git a/tests/python/test_types.py b/tests/python/test_types.py index 2d1cb785136be..893112ce546c8 100644 --- a/tests/python/test_types.py +++ b/tests/python/test_types.py @@ -20,13 +20,14 @@ def func(value: dt): @pytest.mark.parametrize('dt', _TI_TYPES) -@test_utils.test(exclude=[ti.opengl, ti.vulkan]) +@test_utils.test(exclude=[ti.opengl, ti.vulkan, ti.dx11]) def test_type_assign_argument(dt): _test_type_assign_argument(dt) @pytest.mark.parametrize('dt', _TI_64_TYPES) -@test_utils.test(exclude=[ti.opengl, ti.vulkan], require=ti.extension.data64) +@test_utils.test(exclude=[ti.opengl, ti.vulkan, ti.dx11], + require=ti.extension.data64) def test_type_assign_argument64(dt): _test_type_assign_argument(dt) @@ -52,13 +53,14 @@ def func(): @pytest.mark.parametrize('dt', _TI_TYPES) -@test_utils.test(exclude=[ti.opengl, ti.vulkan]) +@test_utils.test(exclude=[ti.opengl, ti.vulkan, ti.dx11]) def test_type_operator(dt): _test_type_operator(dt) @pytest.mark.parametrize('dt', _TI_64_TYPES) -@test_utils.test(exclude=[ti.opengl, ti.vulkan], require=ti.extension.data64) +@test_utils.test(exclude=[ti.opengl, ti.vulkan, ti.dx11], + require=ti.extension.data64) def test_type_operator64(dt): _test_type_operator(dt) @@ -77,13 +79,14 @@ def func(i: ti.i32, j: ti.i32): @pytest.mark.parametrize('dt', _TI_TYPES) -@test_utils.test(exclude=[ti.opengl, ti.vulkan]) +@test_utils.test(exclude=[ti.opengl, ti.vulkan, ti.dx11]) def test_type_field(dt): _test_type_field(dt) @pytest.mark.parametrize('dt', _TI_64_TYPES) -@test_utils.test(exclude=[ti.opengl, ti.vulkan], require=ti.extension.data64) +@test_utils.test(exclude=[ti.opengl, ti.vulkan, ti.dx11], + require=ti.extension.data64) def test_type_field64(dt): _test_type_field(dt) @@ -119,7 +122,7 @@ def func(): (ti.i32, 32), (ti.u32, 32), ]) -@test_utils.test(exclude=[ti.opengl, ti.vulkan]) +@test_utils.test(exclude=[ti.opengl, ti.vulkan, ti.dx11]) def test_overflow(dt, n): _test_overflow(dt, n) @@ -128,7 +131,8 @@ def test_overflow(dt, n): (ti.i64, 64), (ti.u64, 64), ]) -@test_utils.test(exclude=[ti.opengl, ti.vulkan], require=ti.extension.data64) +@test_utils.test(exclude=[ti.opengl, ti.vulkan, ti.dx11], + require=ti.extension.data64) def test_overflow64(dt, n): _test_overflow(dt, n) diff --git a/tests/test_utils.py b/tests/test_utils.py index 9f7d9babb0aa7..c42d230cb886a 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -7,7 +7,7 @@ import pytest from taichi._lib import core as _ti_core -from taichi.lang import cc, cpu, cuda, gpu, metal, opengl, vulkan +from taichi.lang import cc, cpu, cuda, dx11, gpu, metal, opengl, vulkan from taichi.lang.misc import is_arch_supported import taichi as ti