From a9f2905b16c7581e48b0ba46dc5f7918665f7baf Mon Sep 17 00:00:00 2001 From: PENGUINLIONG Date: Thu, 15 Sep 2022 09:54:37 +0800 Subject: [PATCH 1/7] [aot] Guard C-API interfaces with try-catch (#6060) --- c_api/include/taichi/taichi_core.h | 1 + c_api/src/taichi_core_impl.cpp | 113 ++++++++++++++++++++++++----- c_api/src/taichi_core_impl.h | 14 ++++ c_api/src/taichi_opengl_impl.cpp | 2 + c_api/src/taichi_vulkan_impl.cpp | 36 +++++++-- c_api/taichi.json | 3 +- 6 files changed, 146 insertions(+), 23 deletions(-) diff --git a/c_api/include/taichi/taichi_core.h b/c_api/include/taichi/taichi_core.h index 4b23cceb33a19..69a2fefae793b 100644 --- a/c_api/include/taichi/taichi_core.h +++ b/c_api/include/taichi/taichi_core.h @@ -56,6 +56,7 @@ typedef enum TiError { TI_ERROR_ARGUMENT_OUT_OF_RANGE = -6, TI_ERROR_ARGUMENT_NOT_FOUND = -7, TI_ERROR_INVALID_INTEROP = -8, + TI_ERROR_INVALID_STATE = -9, TI_ERROR_MAX_ENUM = 0xffffffff, } TiError; diff --git a/c_api/src/taichi_core_impl.cpp b/c_api/src/taichi_core_impl.cpp index 4002af00376f1..303a8eb26450b 100644 --- a/c_api/src/taichi_core_impl.cpp +++ b/c_api/src/taichi_core_impl.cpp @@ -35,6 +35,8 @@ const char *describe_error(TiError error) { return "argument not found"; case TI_ERROR_INVALID_INTEROP: return "invalid interop"; + case TI_ERROR_INVALID_STATE: + return "invalid state"; default: return "unknown error"; } @@ -104,6 +106,8 @@ Runtime &Event::runtime() { // ----------------------------------------------------------------------------- TiError ti_get_last_error(uint64_t message_size, char *message) { + TiError out = TI_ERROR_INVALID_STATE; + TI_CAPI_TRY_CATCH_BEGIN(); // Emit message only if the output buffer is property provided. if (message_size > 0 && message != nullptr) { size_t n = thread_error_cache.message.size(); @@ -113,11 +117,14 @@ TiError ti_get_last_error(uint64_t message_size, char *message) { std::memcpy(message, thread_error_cache.message.data(), n); message[n] = '\0'; } - return thread_error_cache.error; + out = thread_error_cache.error; + TI_CAPI_TRY_CATCH_END(); + return out; } // C-API errors MUST be set via this interface. No matter from internal or // external procedures. void ti_set_last_error(TiError error, const char *message) { + TI_CAPI_TRY_CATCH_BEGIN(); if (error < TI_ERROR_SUCCESS) { TI_WARN("C-API error: ({}) {}", describe_error(error), message); if (message != nullptr) { @@ -130,32 +137,40 @@ void ti_set_last_error(TiError error, const char *message) { thread_error_cache.error = TI_ERROR_SUCCESS; thread_error_cache.message.clear(); } + TI_CAPI_TRY_CATCH_END(); } TiRuntime ti_create_runtime(TiArch arch) { + TiRuntime out = TI_NULL_HANDLE; + TI_CAPI_TRY_CATCH_BEGIN(); switch (arch) { #ifdef TI_WITH_VULKAN case TI_ARCH_VULKAN: { - return (TiRuntime)(static_cast(new VulkanRuntimeOwned)); + out = (TiRuntime)(static_cast(new VulkanRuntimeOwned)); + break; } #endif // TI_WITH_VULKAN #ifdef TI_WITH_OPENGL case TI_ARCH_OPENGL: { - return (TiRuntime)(static_cast(new OpenglRuntime)); + out = (TiRuntime)(static_cast(new OpenglRuntime)); + break; } #endif // TI_WITH_OPENGL #ifdef TI_WITH_LLVM case TI_ARCH_X64: { - return (TiRuntime)(static_cast( + out = (TiRuntime)(static_cast( new capi::LlvmRuntime(taichi::Arch::x64))); + break; } case TI_ARCH_ARM64: { - return (TiRuntime)(static_cast( + out = (TiRuntime)(static_cast( new capi::LlvmRuntime(taichi::Arch::arm64))); + break; } case TI_ARCH_CUDA: { - return (TiRuntime)(static_cast( + out = (TiRuntime)(static_cast( new capi::LlvmRuntime(taichi::Arch::cuda))); + break; } #endif // TI_WITH_LLVM default: { @@ -163,15 +178,20 @@ TiRuntime ti_create_runtime(TiArch arch) { return TI_NULL_HANDLE; } } - return TI_NULL_HANDLE; + TI_CAPI_TRY_CATCH_END(); + return out; } void ti_destroy_runtime(TiRuntime runtime) { + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL(runtime); delete (Runtime *)runtime; + TI_CAPI_TRY_CATCH_END(); } TiMemory ti_allocate_memory(TiRuntime runtime, const TiMemoryAllocateInfo *create_info) { + TiMemory out = TI_NULL_HANDLE; + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL_RV(runtime); TI_CAPI_ARGUMENT_NULL_RV(create_info); @@ -196,35 +216,46 @@ TiMemory ti_allocate_memory(TiRuntime runtime, params.export_sharing = create_info->export_sharing; params.usage = usage; - TiMemory devmem = ((Runtime *)runtime)->allocate_memory(params); - return devmem; + out = ((Runtime *)runtime)->allocate_memory(params); + TI_CAPI_TRY_CATCH_END(); + return out; } void ti_free_memory(TiRuntime runtime, TiMemory devmem) { + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL(runtime); TI_CAPI_ARGUMENT_NULL(devmem); Runtime *runtime2 = (Runtime *)runtime; runtime2->free_memory(devmem); + TI_CAPI_TRY_CATCH_END(); } void *ti_map_memory(TiRuntime runtime, TiMemory devmem) { + void *out = nullptr; + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL_RV(runtime); TI_CAPI_ARGUMENT_NULL_RV(devmem); Runtime *runtime2 = (Runtime *)runtime; - return runtime2->get().map(devmem2devalloc(*runtime2, devmem)); + out = runtime2->get().map(devmem2devalloc(*runtime2, devmem)); + TI_CAPI_TRY_CATCH_END(); + return out; } void ti_unmap_memory(TiRuntime runtime, TiMemory devmem) { + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL(runtime); TI_CAPI_ARGUMENT_NULL(devmem); Runtime *runtime2 = (Runtime *)runtime; runtime2->get().unmap(devmem2devalloc(*runtime2, devmem)); + TI_CAPI_TRY_CATCH_END(); } TiImage ti_allocate_image(TiRuntime runtime, const TiImageAllocateInfo *allocate_info) { + TiImage out = TI_NULL_HANDLE; + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL_RV(runtime); TI_CAPI_ARGUMENT_NULL_RV(allocate_info); @@ -275,43 +306,58 @@ TiImage ti_allocate_image(TiRuntime runtime, params.export_sharing = false; params.usage = usage; - TiImage devimg = ((Runtime *)runtime)->allocate_image(params); - return devimg; + out = ((Runtime *)runtime)->allocate_image(params); + TI_CAPI_TRY_CATCH_END(); + return out; } void ti_free_image(TiRuntime runtime, TiImage image) { + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL(runtime); TI_CAPI_ARGUMENT_NULL(image); ((Runtime *)runtime)->free_image(image); + TI_CAPI_TRY_CATCH_END(); } TiSampler ti_create_sampler(TiRuntime runtime, const TiSamplerCreateInfo *create_info) { + TiSampler out = TI_NULL_HANDLE; + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_NOT_SUPPORTED(ti_create_sampler); - return TI_NULL_HANDLE; + TI_CAPI_TRY_CATCH_END(); + return out; } void ti_destroy_sampler(TiRuntime runtime, TiSampler sampler) { + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_NOT_SUPPORTED(ti_destroy_sampler); + TI_CAPI_TRY_CATCH_END(); } TiEvent ti_create_event(TiRuntime runtime) { + TiEvent out = TI_NULL_HANDLE; + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL_RV(runtime); Runtime *runtime2 = (Runtime *)runtime; std::unique_ptr event = runtime2->get().create_event(); Event *event2 = new Event(*runtime2, std::move(event)); - return (TiEvent)event2; + out = (TiEvent)event2; + TI_CAPI_TRY_CATCH_END(); + return out; } void ti_destroy_event(TiEvent event) { + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL(event); delete (Event *)event; + TI_CAPI_TRY_CATCH_END(); } void ti_copy_memory_device_to_device(TiRuntime runtime, const TiMemorySlice *dst_memory, const TiMemorySlice *src_memory) { + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL(runtime); TI_CAPI_ARGUMENT_NULL(dst_memory); TI_CAPI_ARGUMENT_NULL(dst_memory->memory); @@ -325,11 +371,13 @@ void ti_copy_memory_device_to_device(TiRuntime runtime, auto src = devmem2devalloc(*runtime2, src_memory->memory) .get_ptr(src_memory->offset); runtime2->buffer_copy(dst, src, dst_memory->size); + TI_CAPI_TRY_CATCH_END(); } void ti_copy_texture_device_to_device(TiRuntime runtime, const TiImageSlice *dst_texture, const TiImageSlice *src_texture) { + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL(runtime); TI_CAPI_ARGUMENT_NULL(dst_texture); TI_CAPI_ARGUMENT_NULL(dst_texture->image); @@ -353,10 +401,12 @@ void ti_copy_texture_device_to_device(TiRuntime runtime, params.height = dst_texture->extent.height; params.depth = dst_texture->extent.depth; runtime2->copy_image(dst, src, params); + TI_CAPI_TRY_CATCH_END(); } void ti_transition_texture(TiRuntime runtime, TiImage texture, TiImageLayout layout) { + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL(runtime); TI_CAPI_ARGUMENT_NULL(texture); @@ -376,9 +426,12 @@ void ti_transition_texture(TiRuntime runtime, } runtime2->transition_image(image, layout2); + TI_CAPI_TRY_CATCH_END(); } TiAotModule ti_load_aot_module(TiRuntime runtime, const char *module_path) { + TiAotModule out = TI_NULL_HANDLE; + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL_RV(runtime); TI_CAPI_ARGUMENT_NULL_RV(module_path); @@ -388,15 +441,21 @@ TiAotModule ti_load_aot_module(TiRuntime runtime, const char *module_path) { ti_set_last_error(TI_ERROR_CORRUPTED_DATA, module_path); return TI_NULL_HANDLE; } - return aot_module; + out = aot_module; + TI_CAPI_TRY_CATCH_END(); + return out; } void ti_destroy_aot_module(TiAotModule aot_module) { + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL(aot_module); delete (AotModule *)aot_module; + TI_CAPI_TRY_CATCH_END(); } TiKernel ti_get_aot_module_kernel(TiAotModule aot_module, const char *name) { + TiKernel out = TI_NULL_HANDLE; + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL_RV(aot_module); TI_CAPI_ARGUMENT_NULL_RV(name); @@ -408,11 +467,15 @@ TiKernel ti_get_aot_module_kernel(TiAotModule aot_module, const char *name) { return TI_NULL_HANDLE; } - return (TiKernel)kernel; + out = (TiKernel)kernel; + TI_CAPI_TRY_CATCH_END(); + return out; } TiComputeGraph ti_get_aot_module_compute_graph(TiAotModule aot_module, const char *name) { + TiComputeGraph out = TI_NULL_HANDLE; + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL_RV(aot_module); TI_CAPI_ARGUMENT_NULL_RV(name); @@ -424,13 +487,16 @@ TiComputeGraph ti_get_aot_module_compute_graph(TiAotModule aot_module, return TI_NULL_HANDLE; } - return (TiComputeGraph)cgraph; + out = (TiComputeGraph)cgraph; + TI_CAPI_TRY_CATCH_END(); + return out; } void ti_launch_kernel(TiRuntime runtime, TiKernel kernel, uint32_t arg_count, const TiArgument *args) { + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL(runtime); TI_CAPI_ARGUMENT_NULL(kernel); if (arg_count > 0) { @@ -482,12 +548,14 @@ void ti_launch_kernel(TiRuntime runtime, } } ((taichi::lang::aot::Kernel *)kernel)->launch(&runtime_context); + TI_CAPI_TRY_CATCH_END(); } void ti_launch_compute_graph(TiRuntime runtime, TiComputeGraph compute_graph, uint32_t arg_count, const TiNamedArgument *args) { + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL(runtime); TI_CAPI_ARGUMENT_NULL(compute_graph); if (arg_count > 0) { @@ -615,36 +683,47 @@ void ti_launch_compute_graph(TiRuntime runtime, } } ((taichi::lang::aot::CompiledGraph *)compute_graph)->run(arg_map); + TI_CAPI_TRY_CATCH_END(); } void ti_signal_event(TiRuntime runtime, TiEvent event) { + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL(runtime); TI_CAPI_ARGUMENT_NULL(event); ((Runtime *)runtime)->signal_event(&((Event *)event)->get()); + TI_CAPI_TRY_CATCH_END(); } void ti_reset_event(TiRuntime runtime, TiEvent event) { + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL(runtime); TI_CAPI_ARGUMENT_NULL(event); ((Runtime *)runtime)->reset_event(&((Event *)event)->get()); + TI_CAPI_TRY_CATCH_END(); } void ti_wait_event(TiRuntime runtime, TiEvent event) { + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL(runtime); TI_CAPI_ARGUMENT_NULL(event); ((Runtime *)runtime)->wait_event(&((Event *)event)->get()); + TI_CAPI_TRY_CATCH_END(); } void ti_submit(TiRuntime runtime) { + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL(runtime); ((Runtime *)runtime)->submit(); + TI_CAPI_TRY_CATCH_END(); } void ti_wait(TiRuntime runtime) { + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL(runtime); ((Runtime *)runtime)->wait(); + TI_CAPI_TRY_CATCH_END(); } diff --git a/c_api/src/taichi_core_impl.h b/c_api/src/taichi_core_impl.h index bc96cc95ba61f..13967b9bfb21d 100644 --- a/c_api/src/taichi_core_impl.h +++ b/c_api/src/taichi_core_impl.h @@ -1,4 +1,5 @@ #pragma once +#include #include "taichi/taichi_core.h" #include "taichi/aot/module_loader.h" #include "taichi/rhi/device.h" @@ -54,6 +55,19 @@ return TI_NULL_HANDLE; \ } +#define TI_CAPI_TRY_CATCH_BEGIN() try { +#define TI_CAPI_TRY_CATCH_END() \ + } \ + catch (const std::exception &e) { \ + ti_set_last_error(TI_ERROR_INVALID_STATE, e.what()); \ + } \ + catch (const std::string &e) { \ + ti_set_last_error(TI_ERROR_INVALID_STATE, e.c_str()); \ + } \ + catch (...) { \ + ti_set_last_error(TI_ERROR_INVALID_STATE, "c++ exception"); \ + } + class Runtime; class Context; class AotModule; diff --git a/c_api/src/taichi_opengl_impl.cpp b/c_api/src/taichi_opengl_impl.cpp index 62aee96e30180..a3087c027e97a 100644 --- a/c_api/src/taichi_opengl_impl.cpp +++ b/c_api/src/taichi_opengl_impl.cpp @@ -18,6 +18,7 @@ taichi::lang::gfx::GfxRuntime &OpenglRuntime::get_gfx_runtime() { void ti_export_opengl_memory(TiRuntime runtime, TiMemory memory, TiOpenglMemoryInteropInfo *interop_info) { + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL(runtime); TI_CAPI_ARGUMENT_NULL(memory); TI_CAPI_ARGUMENT_NULL(interop_info); @@ -27,4 +28,5 @@ void ti_export_opengl_memory(TiRuntime runtime, taichi::lang::DeviceAllocation devalloc = devmem2devalloc(*runtime2, memory); interop_info->buffer = devalloc.alloc_id; interop_info->size = runtime2->get_gl().get_devalloc_size(devalloc); + TI_CAPI_TRY_CATCH_END(); } diff --git a/c_api/src/taichi_vulkan_impl.cpp b/c_api/src/taichi_vulkan_impl.cpp index 4081bcf5d1ad3..bf458b9601e6f 100644 --- a/c_api/src/taichi_vulkan_impl.cpp +++ b/c_api/src/taichi_vulkan_impl.cpp @@ -121,6 +121,8 @@ TiRuntime ti_create_vulkan_runtime_ext(uint32_t api_version, uint32_t instance_extension_count, const char **device_extensions, uint32_t device_extension_count) { + TiRuntime out = TI_NULL_HANDLE; + TI_CAPI_TRY_CATCH_BEGIN(); if (api_version < VK_API_VERSION_1_0) { ti_set_last_error(TI_ERROR_ARGUMENT_OUT_OF_RANGE, "api_version<1.0"); return TI_NULL_HANDLE; @@ -146,10 +148,14 @@ TiRuntime ti_create_vulkan_runtime_ext(uint32_t api_version, params.additional_device_extensions.push_back(device_extensions[i]); } params.surface_creator = nullptr; - return (TiRuntime) static_cast(new VulkanRuntimeOwned(params)); + out = (TiRuntime) static_cast(new VulkanRuntimeOwned(params)); + TI_CAPI_TRY_CATCH_END(); + return out; } TiRuntime ti_import_vulkan_runtime( const TiVulkanRuntimeInteropInfo *interop_info) { + TiRuntime out = TI_NULL_HANDLE; + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL_RV(interop_info); TI_CAPI_ARGUMENT_NULL_RV(interop_info->instance); TI_CAPI_ARGUMENT_NULL_RV(interop_info->physical_device); @@ -164,11 +170,14 @@ TiRuntime ti_import_vulkan_runtime( params.graphics_queue = interop_info->graphics_queue; params.graphics_queue_family_index = interop_info->graphics_queue_family_index; - return (TiRuntime) static_cast( + out = (TiRuntime) static_cast( new VulkanRuntimeImported(interop_info->api_version, params)); + TI_CAPI_TRY_CATCH_END(); + return out; } void ti_export_vulkan_runtime(TiRuntime runtime, TiVulkanRuntimeInteropInfo *interop_info) { + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL(runtime); TI_CAPI_ARGUMENT_NULL(interop_info); @@ -186,11 +195,14 @@ void ti_export_vulkan_runtime(TiRuntime runtime, interop_info->graphics_queue = vk_device.graphics_queue(); interop_info->graphics_queue_family_index = vk_device.graphics_queue_family_index(); + TI_CAPI_TRY_CATCH_END(); } TiMemory ti_import_vulkan_memory( TiRuntime runtime, const TiVulkanMemoryInteropInfo *interop_info) { + TiMemory out = TI_NULL_HANDLE; + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL_RV(runtime); TI_CAPI_ARGUMENT_NULL_RV(interop_info); TI_CAPI_ARGUMENT_NULL_RV(interop_info->buffer); @@ -204,7 +216,9 @@ TiMemory ti_import_vulkan_memory( vkapi::create_buffer(vk_runtime.vk_device(), interop_info->buffer, interop_info->size, interop_info->usage); taichi::lang::DeviceAllocation devalloc = vk_runtime.import_vkbuffer(buffer); - return devalloc2devmem(*runtime2, devalloc); + out = devalloc2devmem(*runtime2, devalloc); + TI_CAPI_TRY_CATCH_END(); + return out; } void ti_export_vulkan_memory(TiRuntime runtime, TiMemory memory, @@ -225,6 +239,8 @@ TiImage ti_import_vulkan_image(TiRuntime runtime, const TiVulkanImageInteropInfo *interop_info, VkImageViewType view_type, VkImageLayout layout) { + TiImage out = TI_NULL_HANDLE; + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL_RV(runtime); TI_CAPI_ARGUMENT_NULL_RV(interop_info); TI_CAPI_ARGUMENT_NULL_RV(interop_info->image); @@ -268,12 +284,15 @@ TiImage ti_import_vulkan_image(TiRuntime runtime, taichi::lang::DeviceAllocation image2 = vk_runtime.import_vk_image(image, image_view, layout); - return devalloc2devimg(*runtime2, image2); + out = devalloc2devimg(*runtime2, image2); + TI_CAPI_TRY_CATCH_END(); + return out; } void ti_export_vulkan_image(TiRuntime runtime, TiImage image, TiVulkanImageInteropInfo *interop_info) { + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL(runtime); TI_CAPI_ARGUMENT_NULL(image); TI_CAPI_ARGUMENT_NULL(interop_info); @@ -294,10 +313,13 @@ void ti_export_vulkan_image(TiRuntime runtime, interop_info->sample_count = VK_SAMPLE_COUNT_1_BIT; interop_info->tiling = VK_IMAGE_TILING_OPTIMAL; interop_info->usage = image2->usage; + TI_CAPI_TRY_CATCH_END(); } TiEvent ti_import_vulkan_event(TiRuntime runtime, const TiVulkanEventInteropInfo *interop_info) { + TiEvent out = TI_NULL_HANDLE; + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL_RV(runtime); TI_CAPI_ARGUMENT_NULL_RV(interop_info); TI_CAPI_ARGUMENT_NULL_RV(interop_info->event); @@ -313,11 +335,14 @@ TiEvent ti_import_vulkan_event(TiRuntime runtime, std::unique_ptr event2( new taichi::lang::vulkan::VulkanDeviceEvent(std::move(event))); - return (TiEvent) new Event(*runtime2, std::move(event2)); + out = (TiEvent) new Event(*runtime2, std::move(event2)); + TI_CAPI_TRY_CATCH_END(); + return out; } void ti_export_vulkan_event(TiRuntime runtime, TiEvent event, TiVulkanEventInteropInfo *interop_info) { + TI_CAPI_TRY_CATCH_BEGIN(); TI_CAPI_ARGUMENT_NULL(runtime); TI_CAPI_ARGUMENT_NULL(event); TI_CAPI_ARGUMENT_NULL(interop_info); @@ -326,4 +351,5 @@ void ti_export_vulkan_event(TiRuntime runtime, auto event2 = (taichi::lang::vulkan::VulkanDeviceEvent *)(&((Event *)event)->get()); interop_info->event = event2->vkapi_ref->event; + TI_CAPI_TRY_CATCH_END(); } diff --git a/c_api/taichi.json b/c_api/taichi.json index d352b122cdf9b..dd5fa4d73aa07 100644 --- a/c_api/taichi.json +++ b/c_api/taichi.json @@ -93,7 +93,8 @@ "argument_null": -5, "argument_out_of_range": -6, "argument_not_found": -7, - "invalid_interop": -8 + "invalid_interop": -8, + "invalid_state": -9 } }, { From e7bdbff27d832ad65d0dd0c1d35efe0f61c3c3ca Mon Sep 17 00:00:00 2001 From: Xiang Li Date: Wed, 14 Sep 2022 18:59:13 -0700 Subject: [PATCH 2/7] [dx12] Drop code for llvm passes which prepare for DXIL generation. (#5998) 2 passes are added for DXIL generation. TaichiIntrinsicLower will translate taichi intrinsic like thread_idx into the form DirectX backend expected. TaichiRuntimeContextLower will translate the TaichiRuntimeContext parameter for kernel into Buffers/ConstantBuffers. TaichiRuntimeContextLower is empty now. It is added after inline so optimizations reduce the load/store on temp ptr. And it is easier to know a store is on the TaichiRuntimeContext. Related issue = #5276 Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- taichi/codegen/dx12/CMakeLists.txt | 2 + .../dx12/dx12_global_optimize_module.cpp | 19 +++ taichi/codegen/dx12/dx12_llvm_passes.h | 24 ++++ taichi/codegen/dx12/dx12_lower_intrinsic.cpp | 121 ++++++++++++++++++ .../dx12/dx12_lower_runtime_context.cpp | 49 +++++++ 5 files changed, 215 insertions(+) create mode 100644 taichi/codegen/dx12/dx12_lower_intrinsic.cpp create mode 100644 taichi/codegen/dx12/dx12_lower_runtime_context.cpp diff --git a/taichi/codegen/dx12/CMakeLists.txt b/taichi/codegen/dx12/CMakeLists.txt index 24dca8d27238b..a22c3e52d91e3 100644 --- a/taichi/codegen/dx12/CMakeLists.txt +++ b/taichi/codegen/dx12/CMakeLists.txt @@ -5,6 +5,8 @@ target_sources(dx12_codegen PRIVATE codegen_dx12.cpp dx12_global_optimize_module.cpp + dx12_lower_intrinsic.cpp + dx12_lower_runtime_context.cpp ) target_include_directories(dx12_codegen diff --git a/taichi/codegen/dx12/dx12_global_optimize_module.cpp b/taichi/codegen/dx12/dx12_global_optimize_module.cpp index 94a9d7c003328..316a295d5aea6 100644 --- a/taichi/codegen/dx12/dx12_global_optimize_module.cpp +++ b/taichi/codegen/dx12/dx12_global_optimize_module.cpp @@ -8,6 +8,7 @@ #include "taichi/util/file_sequence_writer.h" #include "taichi/runtime/llvm/llvm_context.h" +#include "dx12_llvm_passes.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/IR/Function.h" @@ -38,6 +39,8 @@ namespace taichi { namespace lang { namespace directx12 { +const char *NumWorkGroupsCBName = "num_work_groups.cbuf"; + const llvm::StringRef ShaderAttrKindStr = "hlsl.shader"; void mark_function_as_cs_entry(::llvm::Function *F) { @@ -53,6 +56,16 @@ void set_num_threads(llvm::Function *F, unsigned x, unsigned y, unsigned z) { F->addFnAttr(NumThreadsAttrKindStr, Str); } +GlobalVariable *createGlobalVariableForResource(Module &M, + const char *Name, + llvm::Type *Ty) { + auto *GV = new GlobalVariable(M, Ty, /*isConstant*/ false, + GlobalValue::LinkageTypes::ExternalLinkage, + /*Initializer*/ nullptr, Name); + GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None); + return GV; +} + std::vector global_optimize_module(llvm::Module *module, CompileConfig &config) { TI_AUTO_PROF @@ -104,6 +117,9 @@ std::vector global_optimize_module(llvm::Module *module, module->setDataLayout(target_machine->createDataLayout()); + // Lower taichi intrinsic first. + module_pass_manager.add(createTaichiIntrinsicLowerPass(&config)); + module_pass_manager.add(createTargetTransformInfoWrapperPass( target_machine->getTargetIRAnalysis())); function_pass_manager.add(createTargetTransformInfoWrapperPass( @@ -119,6 +135,9 @@ std::vector global_optimize_module(llvm::Module *module, b.populateFunctionPassManager(function_pass_manager); b.populateModulePassManager(module_pass_manager); + // Add passes after inline. + module_pass_manager.add(createTaichiRuntimeContextLowerPass()); + llvm::SmallString<256> str; llvm::raw_svector_ostream OS(str); // Write DXIL container to OS. diff --git a/taichi/codegen/dx12/dx12_llvm_passes.h b/taichi/codegen/dx12/dx12_llvm_passes.h index c07896abba1a3..2821cead303d3 100644 --- a/taichi/codegen/dx12/dx12_llvm_passes.h +++ b/taichi/codegen/dx12/dx12_llvm_passes.h @@ -7,6 +7,8 @@ namespace llvm { class Function; class Module; +class Type; +class GlobalVariable; } // namespace llvm namespace taichi { @@ -18,6 +20,9 @@ namespace directx12 { void mark_function_as_cs_entry(llvm::Function *); bool is_cs_entry(llvm::Function *); void set_num_threads(llvm::Function *, unsigned x, unsigned y, unsigned z); +llvm::GlobalVariable *createGlobalVariableForResource(llvm::Module &M, + const char *Name, + llvm::Type *Ty); std::vector global_optimize_module(llvm::Module *module, CompileConfig &config); @@ -27,3 +32,22 @@ extern const char *NumWorkGroupsCBName; } // namespace directx12 } // namespace lang } // namespace taichi + +namespace llvm { +class ModulePass; +class PassRegistry; +class Function; + +/// Initializer for DXIL-prepare +void initializeTaichiRuntimeContextLowerPass(PassRegistry &); + +/// Pass to convert modules into DXIL-compatable modules +ModulePass *createTaichiRuntimeContextLowerPass(); + +/// Initializer for taichi intrinsic lower. +void initializeTaichiIntrinsicLowerPass(PassRegistry &); + +/// Pass to lower taichi intrinsic into DXIL intrinsic. +ModulePass *createTaichiIntrinsicLowerPass(taichi::lang::CompileConfig *config); + +} // namespace llvm diff --git a/taichi/codegen/dx12/dx12_lower_intrinsic.cpp b/taichi/codegen/dx12/dx12_lower_intrinsic.cpp new file mode 100644 index 0000000000000..2a694ca04af49 --- /dev/null +++ b/taichi/codegen/dx12/dx12_lower_intrinsic.cpp @@ -0,0 +1,121 @@ + +#include "dx12_llvm_passes.h" +#include "llvm/Pass.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Instructions.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/IntrinsicsDirectX.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +#include "taichi/program/compile_config.h" +#include "taichi/runtime/llvm/llvm_context.h" + +using namespace llvm; +using namespace taichi::lang::directx12; + +#define DEBUG_TYPE "dxil-taichi-intrinsic-lower" + +namespace { + +class TaichiIntrinsicLower : public ModulePass { + public: + bool runOnModule(Module &M) override { + auto &Ctx = M.getContext(); + // patch intrinsic + auto patch_intrinsic = [&](std::string name, Intrinsic::ID intrin, + bool ret = true, + std::vector types = {}, + std::vector extra_args = {}) { + auto func = M.getFunction(name); + if (!func) { + return; + } + func->deleteBody(); + auto bb = llvm::BasicBlock::Create(Ctx, "entry", func); + IRBuilder<> builder(Ctx); + builder.SetInsertPoint(bb); + std::vector args; + for (auto &arg : func->args()) + args.push_back(&arg); + args.insert(args.end(), extra_args.begin(), extra_args.end()); + if (ret) { + builder.CreateRet(builder.CreateIntrinsic(intrin, types, args)); + } else { + builder.CreateIntrinsic(intrin, types, args); + builder.CreateRetVoid(); + } + func->setLinkage(GlobalValue::LinkageTypes::InternalLinkage); + taichi::lang::TaichiLLVMContext::mark_inline(func); + }; + + llvm::IRBuilder<> B(Ctx); + Value *i32Zero = B.getInt32(0); + + auto patch_intrinsic_to_const = [&](std::string name, Constant *C, + Type *Ty) { + auto func = M.getFunction(name); + if (!func) { + return; + } + func->deleteBody(); + auto bb = llvm::BasicBlock::Create(Ctx, "entry", func); + IRBuilder<> B(Ctx); + B.SetInsertPoint(bb); + Value *V = C; + if (V->getType()->isPointerTy()) + V = B.CreateLoad(Ty, C); + B.CreateRet(V); + func->setLinkage(GlobalValue::LinkageTypes::InternalLinkage); + taichi::lang::TaichiLLVMContext::mark_inline(func); + }; + // group thread id. + patch_intrinsic("thread_idx", Intrinsic::dx_thread_id_in_group, true, {}, + {i32Zero}); + // group idx. + patch_intrinsic("block_idx", Intrinsic::dx_group_id, true, {}, {i32Zero}); + // Group Size + unsigned group_size = 64; + if (config) + group_size = config->default_gpu_block_dim; + + auto *I32Ty = B.getInt32Ty(); + Constant *block_dim = B.getInt32(group_size); + patch_intrinsic_to_const("block_dim", block_dim, I32Ty); + // Num work groups will be in a special CBuffer. + // TaichiRuntimeContextLower pass will place the CBuffer to special binding + // space. + Type *TyNumWorkGroups = FixedVectorType::get(I32Ty, 3); + Constant *CBNumWorkGroups = createGlobalVariableForResource( + M, NumWorkGroupsCBName, TyNumWorkGroups); + + Constant *NumWorkGroupX = cast( + B.CreateConstGEP2_32(TyNumWorkGroups, CBNumWorkGroups, 0, 0)); + patch_intrinsic_to_const("grid_dim", NumWorkGroupX, I32Ty); + return true; + } + + TaichiIntrinsicLower(taichi::lang::CompileConfig *config = nullptr) + : ModulePass(ID), config(config) { + initializeTaichiIntrinsicLowerPass(*PassRegistry::getPassRegistry()); + } + + static char ID; // Pass identification. + private: + taichi::lang::CompileConfig *config; +}; +char TaichiIntrinsicLower::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS(TaichiIntrinsicLower, + DEBUG_TYPE, + "Lower taichi intrinsic", + false, + false) + +llvm::ModulePass *llvm::createTaichiIntrinsicLowerPass( + taichi::lang::CompileConfig *config) { + return new TaichiIntrinsicLower(config); +} diff --git a/taichi/codegen/dx12/dx12_lower_runtime_context.cpp b/taichi/codegen/dx12/dx12_lower_runtime_context.cpp new file mode 100644 index 0000000000000..26884557e2659 --- /dev/null +++ b/taichi/codegen/dx12/dx12_lower_runtime_context.cpp @@ -0,0 +1,49 @@ + + +#include "dx12_llvm_passes.h" + +#include "llvm/Pass.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Instructions.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +#include "taichi/program/compile_config.h" +#include "taichi/runtime/llvm/llvm_context.h" + +using namespace llvm; +using namespace taichi::lang::directx12; + +#define DEBUG_TYPE "dxil-taichi-runtime-context-lower" + +namespace { + +class TaichiRuntimeContextLower : public ModulePass { + public: + bool runOnModule(Module &M) override { + // TODO: lower taichi RuntimeContext into DXIL resources. + return true; + } + + TaichiRuntimeContextLower() : ModulePass(ID) { + initializeTaichiRuntimeContextLowerPass(*PassRegistry::getPassRegistry()); + } + + static char ID; // Pass identification. + private: +}; +char TaichiRuntimeContextLower::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS(TaichiRuntimeContextLower, + DEBUG_TYPE, + "Lower taichi RuntimeContext", + false, + false) + +llvm::ModulePass *llvm::createTaichiRuntimeContextLowerPass() { + return new TaichiRuntimeContextLower(); +} From 9d27b94ec04943528c4028da4f50fa6dfc25d000 Mon Sep 17 00:00:00 2001 From: Yi Xu Date: Thu, 15 Sep 2022 10:36:52 +0800 Subject: [PATCH 3/7] [ir] MatrixField refactor 3/n: Add MatrixFieldExpression (#6010) Related issue = #5959 This PR adds definition of `MatrixFieldExpression`, which serves as the representation of a matrix field in C++. Currently only `dynamic_index_stride` has been completely moved there. Others will get migrated in future PRs. Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- python/taichi/lang/matrix.py | 14 ++++++++++---- python/taichi/lang/mesh.py | 3 ++- taichi/analysis/gen_offline_cache_key.cpp | 7 +++++++ taichi/inc/expressions.inc.h | 1 + taichi/ir/expr.cpp | 6 ++++++ taichi/ir/expr.h | 4 ++++ taichi/ir/expression.h | 5 +---- taichi/ir/expression_printer.h | 11 +++++++++++ taichi/ir/frontend_ir.h | 18 ++++++++++++++++++ taichi/python/export_lang.cpp | 16 ++++++++++++++++ tests/python/test_matrix.py | 18 +++++++++--------- 11 files changed, 85 insertions(+), 18 deletions(-) diff --git a/python/taichi/lang/matrix.py b/python/taichi/lang/matrix.py index 8dffe6f0634dc..3fcddd7dfe703 100644 --- a/python/taichi/lang/matrix.py +++ b/python/taichi/lang/matrix.py @@ -1538,7 +1538,7 @@ def __init__(self, field, indices): for e in field._get_field_members() ], ndim=getattr(field, "ndim", 2)) - self._impl.dynamic_index_stride = field.dynamic_index_stride + self._impl.dynamic_index_stride = field._get_dynamic_index_stride() class MatrixField(Field): @@ -1557,7 +1557,8 @@ def __init__(self, _vars, n, m, ndim=2): self.n = n self.m = m self.ndim = ndim - self.dynamic_index_stride = None + self.ptr = ti_python_core.expr_matrix_field( + [var.ptr for var in self.vars], [n, m][:ndim]) def get_scalar_field(self, *indices): """Creates a ScalarField using a specific field member. @@ -1573,12 +1574,17 @@ def get_scalar_field(self, *indices): j = 0 if len(indices) == 1 else indices[1] return ScalarField(self.vars[i * self.m + j]) + def _get_dynamic_index_stride(self): + if self.ptr.get_dynamic_indexable(): + return self.ptr.get_dynamic_index_stride() + return None + def _calc_dynamic_index_stride(self): # Algorithm: https://github.com/taichi-dev/taichi/issues/3810 paths = [ScalarField(var).snode._path_from_root() for var in self.vars] num_members = len(paths) if num_members == 1: - self.dynamic_index_stride = 0 + self.ptr.set_dynamic_index_stride(0) return length = len(paths[0]) if any( @@ -1602,7 +1608,7 @@ def _calc_dynamic_index_stride(self): if stride != paths[i][depth_below_lca]._offset_bytes_in_parent_cell \ - paths[i - 1][depth_below_lca]._offset_bytes_in_parent_cell: return - self.dynamic_index_stride = stride + self.ptr.set_dynamic_index_stride(stride) def fill(self, val): """Fills this matrix field with specified values. diff --git a/python/taichi/lang/mesh.py b/python/taichi/lang/mesh.py index 2005d217ccff3..db0e23c629c4f 100644 --- a/python/taichi/lang/mesh.py +++ b/python/taichi/lang/mesh.py @@ -67,7 +67,8 @@ def __init__(self, field: MatrixField, mesh_ptr: _ti_core.MeshPtr, self.grad = field.grad self.n = field.n self.m = field.m - self.dynamic_index_stride = field.dynamic_index_stride + self.ndim = field.ndim + self.ptr = field.ptr self.mesh_ptr = mesh_ptr self.element_type = element_type diff --git a/taichi/analysis/gen_offline_cache_key.cpp b/taichi/analysis/gen_offline_cache_key.cpp index eccfd0049a261..5c1dfbef622f6 100644 --- a/taichi/analysis/gen_offline_cache_key.cpp +++ b/taichi/analysis/gen_offline_cache_key.cpp @@ -160,6 +160,13 @@ class ASTSerializer : public IRVisitor, public ExpressionVisitor { emit(expr->adjoint_checkbit); } + void visit(MatrixFieldExpression *expr) override { + emit(ExprOpCode::MatrixFieldExpression); + emit(expr->fields); + emit(expr->element_shape); + emit(expr->dynamic_index_stride); + } + void visit(IndexExpression *expr) override { emit(ExprOpCode::IndexExpression); emit(expr->var); diff --git a/taichi/inc/expressions.inc.h b/taichi/inc/expressions.inc.h index 0ae81430f83ae..b311e6fb5a441 100644 --- a/taichi/inc/expressions.inc.h +++ b/taichi/inc/expressions.inc.h @@ -6,6 +6,7 @@ PER_EXPRESSION(TernaryOpExpression) PER_EXPRESSION(InternalFuncCallExpression) PER_EXPRESSION(ExternalTensorExpression) PER_EXPRESSION(FieldExpression) +PER_EXPRESSION(MatrixFieldExpression) PER_EXPRESSION(IndexExpression) PER_EXPRESSION(MatrixExpression) PER_EXPRESSION(StrideExpression) diff --git a/taichi/ir/expr.cpp b/taichi/ir/expr.cpp index e36a1a50ad46a..3955bd752e1ad 100644 --- a/taichi/ir/expr.cpp +++ b/taichi/ir/expr.cpp @@ -119,4 +119,10 @@ Expr expr_field(Expr id_expr, DataType dt) { std::make_shared(dt, id_expr.cast()->id)); return ret; } + +Expr expr_matrix_field(const std::vector &fields, + const std::vector &element_shape) { + return Expr::make(fields, element_shape); +} + TLANG_NAMESPACE_END diff --git a/taichi/ir/expr.h b/taichi/ir/expr.h index 7bc4d8f3a8ce9..ba84faecd09a9 100644 --- a/taichi/ir/expr.h +++ b/taichi/ir/expr.h @@ -145,4 +145,8 @@ Expr assume_range(const Expr &expr, const Expr &base, int low, int high); Expr loop_unique(const Expr &input, const std::vector &covers); Expr expr_field(Expr id_expr, DataType dt); + +Expr expr_matrix_field(const std::vector &fields, + const std::vector &element_shape); + TLANG_NAMESPACE_END diff --git a/taichi/ir/expression.h b/taichi/ir/expression.h index e4d9ed08f6ad9..1f400e5b24374 100644 --- a/taichi/ir/expression.h +++ b/taichi/ir/expression.h @@ -39,10 +39,7 @@ class Expression { stmt = nullptr; } - virtual void type_check(CompileConfig *config) { - // TODO: make it pure virtual after type_check for all expressions are - // implemented - } + virtual void type_check(CompileConfig *config) = 0; virtual void accept(ExpressionVisitor *visitor) = 0; diff --git a/taichi/ir/expression_printer.h b/taichi/ir/expression_printer.h index 3f4520d443d81..c0f595efc0f95 100644 --- a/taichi/ir/expression_printer.h +++ b/taichi/ir/expression_printer.h @@ -110,6 +110,17 @@ class ExpressionHumanFriendlyPrinter : public ExpressionPrinter { } } + void visit(MatrixFieldExpression *expr) override { + emit('['); + emit_vector(expr->fields); + emit("] ("); + emit_vector(expr->element_shape); + if (expr->dynamic_index_stride) { + emit(", dynamic_index_stride = ", expr->dynamic_index_stride); + } + emit(')'); + } + void visit(MatrixExpression *expr) override { emit('['); emit_vector(expr->elements); diff --git a/taichi/ir/frontend_ir.h b/taichi/ir/frontend_ir.h index fddad4040d22c..cadf3663b78fb 100644 --- a/taichi/ir/frontend_ir.h +++ b/taichi/ir/frontend_ir.h @@ -521,6 +521,24 @@ class FieldExpression : public Expression { TI_DEFINE_ACCEPT_FOR_EXPRESSION }; +class MatrixFieldExpression : public Expression { + public: + std::vector fields; + std::vector element_shape; + bool dynamic_indexable{false}; + int dynamic_index_stride{0}; + + MatrixFieldExpression(const std::vector &fields, + const std::vector &element_shape) + : fields(fields), element_shape(element_shape) { + } + + void type_check(CompileConfig *config) override { + } + + TI_DEFINE_ACCEPT_FOR_EXPRESSION +}; + /** * Creating a local matrix; * lowered from ti.Matrix with real_matrix=True diff --git a/taichi/python/export_lang.cpp b/taichi/python/export_lang.cpp index ee32dba9ca17c..c3d58b362e52e 100644 --- a/taichi/python/export_lang.cpp +++ b/taichi/python/export_lang.cpp @@ -754,6 +754,20 @@ void export_lang(py::module &m) { .def("set_adjoint", &Expr::set_adjoint) .def("set_adjoint_checkbit", &Expr::set_adjoint_checkbit) .def("set_dual", &Expr::set_dual) + .def("set_dynamic_index_stride", + [&](Expr *expr, int dynamic_index_stride) { + auto matrix_field = expr->cast(); + matrix_field->dynamic_indexable = true; + matrix_field->dynamic_index_stride = dynamic_index_stride; + }) + .def("get_dynamic_indexable", + [&](Expr *expr) -> bool { + return expr->cast()->dynamic_indexable; + }) + .def("get_dynamic_index_stride", + [&](Expr *expr) -> int { + return expr->cast()->dynamic_index_stride; + }) .def( "get_dt", [&](Expr *expr) -> const Type * { @@ -838,6 +852,8 @@ void export_lang(py::module &m) { m.def("expr_field", expr_field); + m.def("expr_matrix_field", expr_matrix_field); + #define DEFINE_EXPRESSION_OP(x) m.def("expr_" #x, expr_##x); DEFINE_EXPRESSION_OP(neg) diff --git a/tests/python/test_matrix.py b/tests/python/test_matrix.py index 6069261419b74..1ecd1b6591117 100644 --- a/tests/python/test_matrix.py +++ b/tests/python/test_matrix.py @@ -402,10 +402,10 @@ def test_matrix_field_dynamic_index_stride(): @ti.kernel def check_stride(): for i in range(128): - assert ti.get_addr(y, i) - ti.get_addr(x, - i) == v.dynamic_index_stride - assert ti.get_addr(z, i) - ti.get_addr(y, - i) == v.dynamic_index_stride + assert ti.get_addr(y, i) - ti.get_addr( + x, i) == v._get_dynamic_index_stride() + assert ti.get_addr(z, i) - ti.get_addr( + y, i) == v._get_dynamic_index_stride() check_stride() @@ -431,7 +431,7 @@ def test_matrix_field_dynamic_index_different_path_length(): ti.root.dense(ti.i, 2).dense(ti.i, 4).place(y) impl.get_runtime().materialize() - assert v.dynamic_index_stride is None + assert v._get_dynamic_index_stride() is None @test_utils.test(arch=[ti.cpu, ti.cuda]) @@ -444,7 +444,7 @@ def test_matrix_field_dynamic_index_not_pure_dense(): ti.root.dense(ti.i, 2).dense(ti.i, 4).place(y) impl.get_runtime().materialize() - assert v.dynamic_index_stride is None + assert v._get_dynamic_index_stride() is None @test_utils.test(arch=[ti.cpu, ti.cuda]) @@ -459,7 +459,7 @@ def test_matrix_field_dynamic_index_different_cell_size_bytes(): ti.root.dense(ti.i, 8).place(y) impl.get_runtime().materialize() - assert v.dynamic_index_stride is None + assert v._get_dynamic_index_stride() is None @test_utils.test(arch=[ti.cpu, ti.cuda]) @@ -475,7 +475,7 @@ def test_matrix_field_dynamic_index_different_offset_bytes_in_parent_cell(): ti.root.dense(ti.i, 8).place(y, temp_b) impl.get_runtime().materialize() - assert v.dynamic_index_stride is None + assert v._get_dynamic_index_stride() is None @test_utils.test(arch=[ti.cpu, ti.cuda]) @@ -490,7 +490,7 @@ def test_matrix_field_dynamic_index_different_stride(): ti.root.dense(ti.i, 8).place(x, y, temp, z) impl.get_runtime().materialize() - assert v.dynamic_index_stride is None + assert v._get_dynamic_index_stride() is None @test_utils.test(arch=[ti.cpu, ti.cuda], dynamic_index=True) From 9a35e049e891a4642e237f39eb70b546ba1083bb Mon Sep 17 00:00:00 2001 From: Mike He Date: Wed, 14 Sep 2022 22:53:17 -0400 Subject: [PATCH 4/7] [Lang] Fixes matrix-vector multiplication (#6014) Related issue = fix #5988 --- python/taichi/lang/matrix.py | 12 ++++++++++++ tests/python/test_matrix.py | 20 +++++++++++++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/python/taichi/lang/matrix.py b/python/taichi/lang/matrix.py index 3fcddd7dfe703..48a6013e37a9e 100644 --- a/python/taichi/lang/matrix.py +++ b/python/taichi/lang/matrix.py @@ -447,6 +447,11 @@ def __init__(self, is_matrix = isinstance(arr[0], Iterable) and not is_vector(self) initializer = _make_entries_initializer(is_matrix) self.ndim = 2 if is_matrix else 1 + if not is_matrix and isinstance(arr[0], Iterable): + flattened = [] + for row in arr: + flattened += row + arr = flattened if in_python_scope() or is_ref: mat = initializer.pyscope_or_ref(arr) @@ -553,6 +558,11 @@ def __matmul__(self, other): """ assert isinstance(other, Matrix), "rhs of `@` is not a matrix / vector" + if is_vector(self) and not is_vector(other): + # left multiplication + assert self.n == other.m, f"Dimension mismatch between shapes ({self.n}, {self.m}), ({other.n}, {other.m})" + return other.transpose() @ self + # right multiplication assert self.m == other.n, f"Dimension mismatch between shapes ({self.n}, {self.m}), ({other.n}, {other.m})" entries = [] for i in range(self.n): @@ -562,6 +572,8 @@ def __matmul__(self, other): for k in range(1, other.n): acc = acc + self(i, k) * other(k, j) entries[i].append(acc) + if is_vector(other) and other.m == 1: + return Vector(entries) return Matrix(entries) # host access & python scope operation diff --git a/tests/python/test_matrix.py b/tests/python/test_matrix.py index 1ecd1b6591117..4e3ad8d688319 100644 --- a/tests/python/test_matrix.py +++ b/tests/python/test_matrix.py @@ -115,7 +115,7 @@ def func(t: ti.i32): m += ti.Matrix([[3, 4], [5, t]]) print(m @ v) print(r.x, r.y, r.z, r.w) - s = w.transpose() @ m + s = w @ m print(s) print(m) @@ -701,6 +701,24 @@ def bar(): bar() +@test_utils.test(arch=get_host_arch_list(), debug=True) +def test_matrix_vector_multiplication(): + mat = ti.math.mat3(1) + vec = ti.math.vec3(3) + r = mat @ vec + for i in range(3): + assert r[i] == 9 + + @ti.kernel + def foo(): + mat = ti.math.mat3(1) + vec = ti.math.vec3(3) + r = mat @ vec + assert r[0] == r[1] == r[2] == 9 + + foo() + + @test_utils.test(arch=[ti.cuda, ti.cpu], real_matrix=True) def test_local_matrix_read(): From 72804ae992212ddee5357bae8b27d6e3f2148831 Mon Sep 17 00:00:00 2001 From: Chang Yu Date: Thu, 15 Sep 2022 14:22:28 +0800 Subject: [PATCH 5/7] [Mesh] [bug] Fix nested mesh for (#6062) Related issue = #3608 --- python/taichi/lang/ast/ast_transformer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/taichi/lang/ast/ast_transformer.py b/python/taichi/lang/ast/ast_transformer.py index 6fac2c6418e0c..07600d02bfe32 100644 --- a/python/taichi/lang/ast/ast_transformer.py +++ b/python/taichi/lang/ast/ast_transformer.py @@ -1110,7 +1110,7 @@ def build_nested_mesh_for(ctx, node): loop_var = expr.Expr(ctx.ast_builder.make_id_expr('')) ctx.create_variable(loop_name, loop_var) begin = expr.Expr(0) - end = node.iter.ptr.size + end = ti_ops.cast(node.iter.ptr.size, primitive_types.i32) ctx.ast_builder.begin_frontend_range_for(loop_var.ptr, begin.ptr, end.ptr) entry_expr = _ti_core.get_relation_access( From 574b05e70f31e0ae5bc314d1fa7c2fe8991643b8 Mon Sep 17 00:00:00 2001 From: Proton Date: Thu, 15 Sep 2022 18:17:52 +0800 Subject: [PATCH 6/7] [ci] Upgrade conda cudatoolkit version to 11.3 (#6070) Related issue = # Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .github/workflows/release.yml | 4 ++-- .github/workflows/testing.yml | 6 +++--- ci/Dockerfile.ubuntu.18.04 | 18 +++++++++--------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index ba6b5128dfa31..0f2cbc90a7640 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -87,7 +87,7 @@ jobs: mkdir -m777 shared docker create --user dev --name taichi_build --gpus all -v /tmp/.X11-unix:/tmp/.X11-unix \ -e DISPLAY -e PY -e GPU_BUILD -e TAICHI_CMAKE_ARGS -e PROJECT_NAME \ - registry.taichigraphics.com/taichidev-ubuntu18.04:v0.3.2 \ + registry.taichigraphics.com/taichidev-ubuntu18.04:v0.3.4 \ /home/dev/taichi/.github/workflows/scripts/unix_build.sh tar -cf - ../${{ github.event.repository.name }} --mode u=+rwx,g=+rwx,o=+rwx --owner 1000 --group 1000 | docker cp - taichi_build:/home/dev/ docker start -a taichi_build @@ -111,7 +111,7 @@ jobs: run: | docker create --user dev --name taichi_test --gpus all -v /tmp/.X11-unix:/tmp/.X11-unix \ -e DISPLAY -e PY -e GPU_TEST \ - registry.taichigraphics.com/taichidev-ubuntu18.04:v0.3.2 \ + registry.taichigraphics.com/taichidev-ubuntu18.04:v0.3.4 \ /home/dev/unix_test.sh docker cp .github/workflows/scripts/unix_test.sh taichi_test:/home/dev/unix_test.sh docker cp .github/workflows/scripts/common-utils.sh taichi_test:/home/dev/common-utils.sh diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 95262572addf9..4aa46e4944852 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -405,7 +405,7 @@ jobs: --gpus 'all,"capabilities=graphics,utility,display,video,compute"' \ -v /tmp/.X11-unix:/tmp/.X11-unix \ -e PY -e GPU_BUILD -e PROJECT_NAME -e TAICHI_CMAKE_ARGS -e DISPLAY \ - registry.taichigraphics.com/taichidev-ubuntu18.04:v0.3.3 \ + registry.taichigraphics.com/taichidev-ubuntu18.04:v0.3.4 \ /home/dev/taichi/.github/workflows/scripts/unix_build.sh # A tarball is needed because sccache needs some permissions that only the file owner has. # 1000 is the uid and gid of user "dev" in the container. @@ -444,7 +444,7 @@ jobs: -e TI_LITE_TEST \ -e TI_TEST_OFFLINE_CACHE \ -e DISPLAY -e PY -e GPU_TEST -e TI_WANTED_ARCHS -e TI_RUN_RELEASE_TESTS \ - registry.taichigraphics.com/taichidev-ubuntu18.04:v0.3.3 \ + registry.taichigraphics.com/taichidev-ubuntu18.04:v0.3.4 \ /home/dev/unix_test.sh docker cp .github/workflows/scripts/unix_test.sh taichi_test:/home/dev/unix_test.sh docker cp .github/workflows/scripts/common-utils.sh taichi_test:/home/dev/common-utils.sh @@ -671,7 +671,7 @@ jobs: docker run --user dev --name taichi_build_host \ $DOCKER_RUN_ARGS \ -v $TAICHI_WHEEL_DIR:/home/dev/taichi/dist \ - registry.taichigraphics.com/taichidev-ubuntu18.04:v0.3.3 \ + registry.taichigraphics.com/taichidev-ubuntu18.04:v0.3.4 \ /home/dev/taichi/.github/workflows/scripts/unix-build-v2.sh env: TAICHI_CMAKE_ARGS: >- diff --git a/ci/Dockerfile.ubuntu.18.04 b/ci/Dockerfile.ubuntu.18.04 index 07aef59d22d6d..f991f0390461a 100644 --- a/ci/Dockerfile.ubuntu.18.04 +++ b/ci/Dockerfile.ubuntu.18.04 @@ -93,17 +93,17 @@ RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh & bash Miniconda3-latest-Linux-x86_64.sh -p /home/dev/miniconda -b ENV PATH="/home/dev/miniconda/bin:$PATH" -# Set up multi-python environment -RUN conda init bash -RUN conda create -n py36 python=3.6 pytorch cudatoolkit=10.2 -c pytorch -y -RUN conda create -n py37 python=3.7 pytorch cudatoolkit=10.2 -c pytorch -y -RUN conda create -n py38 python=3.8 pytorch cudatoolkit=10.2 -c pytorch -y -RUN conda create -n py39 python=3.9 pytorch cudatoolkit=10.2 -c pytorch -y -# TODO add torch to 3.10 when supported -RUN conda create -n py310 python=3.10 -y - # Remove mesa EGL driver, which interferes with the propritary NVIDIA drivers RUN rm -f /usr/lib/x86_64-linux-gnu/libEGL_mesa* WORKDIR /home/dev ENV LANG="C.UTF-8" + +# Set up multi-python environment +RUN conda init bash +RUN conda create -n py36 python=3.6 pytorch cudatoolkit=11.3 -c pytorch -y +RUN conda create -n py37 python=3.7 pytorch cudatoolkit=11.3 -c pytorch -y +RUN conda create -n py38 python=3.8 pytorch cudatoolkit=11.3 -c pytorch -y +RUN conda create -n py39 python=3.9 pytorch cudatoolkit=11.3 -c pytorch -y +# TODO add torch to 3.10 when supported +RUN conda create -n py310 python=3.10 -y From 4d94b31aed9758a167cf7918a5b04f3b8884d08d Mon Sep 17 00:00:00 2001 From: Xiang Li Date: Thu, 15 Sep 2022 03:43:01 -0700 Subject: [PATCH 7/7] [ci] [dx12] Enable dx12 build for windows cpu ci. (#6069) Fix build fail and enable dx12 build for windows cpu ci to make sure it compiles. Related issue = #5276 --- .github/workflows/scripts/win_build_test_cpu.ps1 | 1 + taichi/codegen/dx12/codegen_dx12.cpp | 4 ++-- taichi/codegen/dx12/codegen_dx12.h | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/scripts/win_build_test_cpu.ps1 b/.github/workflows/scripts/win_build_test_cpu.ps1 index 0b1a4cd5e7faa..4aa90582517ed 100644 --- a/.github/workflows/scripts/win_build_test_cpu.ps1 +++ b/.github/workflows/scripts/win_build_test_cpu.ps1 @@ -66,6 +66,7 @@ if (!$llvmVer.CompareTo("10")) { } else { $env:TAICHI_CMAKE_ARGS += " -DLLVM_AS_EXECUTABLE=C:\\taichi_llvm_15\\bin\\llvm-as.exe -DTI_WITH_VULKAN:BOOL=OFF" $env:TAICHI_CMAKE_ARGS += " -DTI_LLVM_15:BOOL=ON" + $env:TAICHI_CMAKE_ARGS += " -DTI_WITH_DX12:BOOL=ON" } diff --git a/taichi/codegen/dx12/codegen_dx12.cpp b/taichi/codegen/dx12/codegen_dx12.cpp index 4be95a53f7b1b..bee1d3c27a6e8 100644 --- a/taichi/codegen/dx12/codegen_dx12.cpp +++ b/taichi/codegen/dx12/codegen_dx12.cpp @@ -227,7 +227,7 @@ class TaskCodeGenLLVMDX12 : public TaskCodeGenLLVM { #ifdef TI_WITH_LLVM static std::vector generate_dxil_from_llvm( - LLVMCompiledData &compiled_data, + LLVMCompiledTask &compiled_data, taichi::lang::Kernel *kernel) { // generate dxil from llvm ir. auto offloaded_local = compiled_data.tasks; @@ -286,7 +286,7 @@ KernelCodeGenDX12::CompileResult KernelCodeGenDX12::compile() { return Result; } -LLVMCompiledData KernelCodeGenDX12::compile_task( +LLVMCompiledTask KernelCodeGenDX12::compile_task( std::unique_ptr &&module, OffloadedStmt *stmt) { TaskCodeGenLLVMDX12 gen(kernel, stmt); diff --git a/taichi/codegen/dx12/codegen_dx12.h b/taichi/codegen/dx12/codegen_dx12.h index 1b9e920e71873..5d352231a1e6b 100644 --- a/taichi/codegen/dx12/codegen_dx12.h +++ b/taichi/codegen/dx12/codegen_dx12.h @@ -22,7 +22,7 @@ class KernelCodeGenDX12 : public KernelCodeGen { }; CompileResult compile(); #ifdef TI_WITH_LLVM - LLVMCompiledData compile_task( + LLVMCompiledTask compile_task( std::unique_ptr &&module = nullptr, OffloadedStmt *stmt = nullptr) override; #endif