galeselee · galeselee · Feb 3, 2023 · Feb 2, 2023 · Feb 2, 2023 · Feb 2, 2023
diff --git a/.github/workflows/scripts/aot-demo.sh b/.github/workflows/scripts/aot-demo.sh
@@ -5,8 +5,8 @@ export TI_SKIP_VERSION_CHECK=ON
 export TI_CI=1
 
 # IF YOU PIN THIS TO A COMMIT/BRANCH, YOU'RE RESPONSIBLE TO REVERT IT BACK TO MASTER ONCE MERGED.
-export TAICHI_AOT_DEMO_URL=https://github.com/PENGUINLIONG/taichi-aot-demo
-export TAICHI_AOT_DEMO_BRANCH=opengl-demo-alt
+export TAICHI_AOT_DEMO_URL=https://github.com/taichi-dev/taichi-aot-demo
+export TAICHI_AOT_DEMO_BRANCH=master
 export TAICHI_UNITY2_URL=https://github.com/taichi-dev/taichi-unity2
 export TAICHI_UNITY2_BRANCH=main
 

diff --git a/misc/save_new_version.py b/misc/save_new_version.py
@@ -28,11 +28,15 @@
                              json=payload,
                              auth=(username, password),
                              timeout=5)
+    r = response.json()
+    print(r)
     response.raise_for_status()
 except requests.exceptions.ConnectionError as err:
     print('Updating latest version failed: No internet,', err)
     exit(1)
 except requests.exceptions.HTTPError as err:
+    if 'duplicate' in r['message']:
+        exit(0)
     print('Updating latest version failed: Server error,', err)
     exit(1)
 except requests.exceptions.Timeout as err:
@@ -43,5 +47,4 @@
     print('Updating latest version failed:', err)
     exit(1)
 
-response = response.json()
-print(response['message'])
+exit(0)
diff --git a/python/taichi/types/texture_type.py b/python/taichi/types/texture_type.py
@@ -1,5 +1,3 @@
-import warnings
-
 from taichi.lang.enums import Format
 from taichi.lang.exception import TaichiCompilationError
 from taichi.types.primitive_types import f16, f32, i8, i16, i32, u8, u16, u32
@@ -63,29 +61,13 @@ class RWTextureType:
 
     Args:
         num_dimensions (int): Number of dimensions. For examples for a 2D texture this should be `2`.
-        num_channels (int): Number of channels in the texture.
-        channel_format (DataType): Data type of texture
         lod (float): Specifies the explicit level-of-detail.
         fmt (ti.Format): Color format of texture
     """
-    def __init__(self,
-                 num_dimensions,
-                 num_channels=None,
-                 channel_format=None,
-                 lod=0,
-                 fmt=None):
+    def __init__(self, num_dimensions, lod=0, fmt=None):
         self.num_dimensions = num_dimensions
         if fmt is None:
-            warnings.warn(
-                "Specifying num_channels and channel_format is deprecated and will be removed in v1.5.0, please specify fmt instead.",
-                DeprecationWarning)
-            if num_channels is None or channel_format is None:
-                raise TaichiCompilationError(
-                    "Incomplete type info for rw_texture, please specify its fmt (ti.Format)"
-                )
-            self.num_channels = num_channels
-            self.channel_format = channel_format
-            self.fmt = TY_CH2FORMAT[(self.channel_format, self.num_channels)]
+            raise TaichiCompilationError("fmt is required for rw_texture type")
         else:
             self.channel_format, self.num_channels = FORMAT2TY_CH[fmt]
             self.fmt = fmt

diff --git a/taichi/codegen/amdgpu/codegen_amdgpu.cpp b/taichi/codegen/amdgpu/codegen_amdgpu.cpp
@@ -294,7 +294,7 @@ class TaskCodeGenAMDGPU : public TaskCodeGenLLVM {
       } else if (stmt->task_type == Type::range_for) {
         create_offload_range_for(stmt);
       } else if (stmt->task_type == Type::struct_for) {
-        create_offload_struct_for(stmt, true);
+        create_offload_struct_for(stmt);
       } else if (stmt->task_type == Type::mesh_for) {
         create_offload_mesh_for(stmt);
       } else if (stmt->task_type == Type::listgen) {
@@ -395,6 +395,18 @@ class TaskCodeGenAMDGPU : public TaskCodeGenLLVM {
       }
     }
   }
+
+ private:
+  std::tuple<llvm::Value *, llvm::Value *> get_spmd_info() override {
+    auto thread_idx =
+        builder->CreateIntrinsic(Intrinsic::amdgcn_workitem_id_x, {}, {});
+    auto workgroup_dim_ =
+        call("__ockl_get_local_size",
+             llvm::ConstantInt::get(llvm::Type::getInt32Ty(*llvm_context), 0));
+    auto block_dim = builder->CreateTrunc(
+        workgroup_dim_, llvm::Type::getInt32Ty(*llvm_context));
+    return std::make_tuple(thread_idx, block_dim);
+  }
 };
 
 LLVMCompiledTask KernelCodeGenAMDGPU::compile_task(
@@ -407,8 +419,7 @@ LLVMCompiledTask KernelCodeGenAMDGPU::compile_task(
 
 FunctionType KernelCodeGenAMDGPU::compile_to_function() {
   auto *llvm_prog = get_llvm_program(prog);
-  const auto &config = get_compile_config();
-  auto *tlctx = llvm_prog->get_llvm_context(config.arch);
+  auto *tlctx = llvm_prog->get_llvm_context();
 
   AMDGPUModuleToFunctionConverter converter{tlctx,
                                             llvm_prog->get_runtime_executor()};

diff --git a/taichi/codegen/codegen.cpp b/taichi/codegen/codegen.cpp
@@ -68,7 +68,7 @@ KernelCodeGen::maybe_read_compilation_from_cache(
   }
 
   LlvmOfflineCache::KernelCacheData cache_data;
-  auto *tlctx = llvm_prog->get_llvm_context(compile_config_.arch);
+  auto *tlctx = llvm_prog->get_llvm_context();
   auto &llvm_ctx = *tlctx->get_this_thread_context();
 
   if (!reader->get_kernel_cache(cache_data, kernel_key, llvm_ctx)) {
@@ -85,7 +85,7 @@ void KernelCodeGen::cache_kernel(const std::string &kernel_key,
 
 LLVMCompiledKernel KernelCodeGen::compile_kernel_to_module() {
   auto *llvm_prog = get_llvm_program(prog);
-  auto *tlctx = llvm_prog->get_llvm_context(compile_config_.arch);
+  auto *tlctx = llvm_prog->get_llvm_context();
   std::string kernel_key =
       get_hashed_offline_cache_key(compile_config_, kernel);
   kernel->set_kernel_key_for_cache(kernel_key);

diff --git a/taichi/codegen/cpu/codegen_cpu.cpp b/taichi/codegen/cpu/codegen_cpu.cpp
@@ -209,6 +209,13 @@ class TaskCodeGenCPU : public TaskCodeGenLLVM {
       TI_NOT_IMPLEMENTED
     }
   }
+
+ private:
+  std::tuple<llvm::Value *, llvm::Value *> get_spmd_info() override {
+    auto thread_idx = tlctx->get_constant(0);
+    auto block_dim = tlctx->get_constant(1);
+    return std::make_tuple(thread_idx, block_dim);
+  }
 };
 
 }  // namespace
@@ -274,8 +281,7 @@ LLVMCompiledTask KernelCodeGenCPU::compile_task(
 FunctionType KernelCodeGenCPU::compile_to_function() {
   TI_AUTO_PROF;
   auto *llvm_prog = get_llvm_program(prog);
-  const auto &config = get_compile_config();
-  auto *tlctx = llvm_prog->get_llvm_context(config.arch);
+  auto *tlctx = llvm_prog->get_llvm_context();
 
   CPUModuleToFunctionConverter converter(
       tlctx, get_llvm_program(prog)->get_runtime_executor());

diff --git a/taichi/codegen/cuda/codegen_cuda.cpp b/taichi/codegen/cuda/codegen_cuda.cpp
@@ -474,7 +474,7 @@ class TaskCodeGenCUDA : public TaskCodeGenLLVM {
       } else if (stmt->task_type == Type::range_for) {
         create_offload_range_for(stmt);
       } else if (stmt->task_type == Type::struct_for) {
-        create_offload_struct_for(stmt, true);
+        create_offload_struct_for(stmt);
       } else if (stmt->task_type == Type::mesh_for) {
         create_offload_mesh_for(stmt);
       } else if (stmt->task_type == Type::listgen) {
@@ -584,6 +584,15 @@ class TaskCodeGenCUDA : public TaskCodeGenLLVM {
           llvm_val[stmt], llvm::Type::getHalfTy(*llvm_context));
     }
   }
+
+ private:
+  std::tuple<llvm::Value *, llvm::Value *> get_spmd_info() override {
+    auto thread_idx =
+        builder->CreateIntrinsic(Intrinsic::nvvm_read_ptx_sreg_tid_x, {}, {});
+    auto block_dim =
+        builder->CreateIntrinsic(Intrinsic::nvvm_read_ptx_sreg_ntid_x, {}, {});
+    return std::make_tuple(thread_idx, block_dim);
+  }
 };
 
 LLVMCompiledTask KernelCodeGenCUDA::compile_task(
@@ -597,8 +606,7 @@ LLVMCompiledTask KernelCodeGenCUDA::compile_task(
 FunctionType KernelCodeGenCUDA::compile_to_function() {
   TI_AUTO_PROF
   auto *llvm_prog = get_llvm_program(prog);
-  const auto &config = get_compile_config();
-  auto *tlctx = llvm_prog->get_llvm_context(config.arch);
+  auto *tlctx = llvm_prog->get_llvm_context();
 
   CUDAModuleToFunctionConverter converter{tlctx,
                                           llvm_prog->get_runtime_executor()};

diff --git a/taichi/codegen/dx12/codegen_dx12.cpp b/taichi/codegen/dx12/codegen_dx12.cpp
@@ -191,6 +191,13 @@ class TaskCodeGenLLVMDX12 : public TaskCodeGenLLVM {
       TI_NOT_IMPLEMENTED
     }
   }
+
+ private:
+  std::tuple<llvm::Value *, llvm::Value *> get_spmd_info() override {
+    auto thread_idx = tlctx->get_constant(0);
+    auto block_dim = tlctx->get_constant(1);
+    return std::make_tuple(thread_idx, block_dim);
+  }
 };
 
 }  // namespace

diff --git a/taichi/codegen/llvm/codegen_llvm.cpp b/taichi/codegen/llvm/codegen_llvm.cpp
@@ -308,13 +308,11 @@ TaskCodeGenLLVM::TaskCodeGenLLVM(const CompileConfig &compile_config,
                                  IRNode *ir,
                                  std::unique_ptr<llvm::Module> &&module)
     // TODO: simplify LLVMModuleBuilder ctor input
-    : LLVMModuleBuilder(module == nullptr
-                            ? get_llvm_program(kernel->program)
-                                  ->get_llvm_context(compile_config.arch)
-                                  ->new_module("kernel")
-                            : std::move(module),
-                        get_llvm_program(kernel->program)
-                            ->get_llvm_context(compile_config.arch)),
+    : LLVMModuleBuilder(module == nullptr ? get_llvm_program(kernel->program)
+                                                ->get_llvm_context()
+                                                ->new_module("kernel")
+                                          : std::move(module),
+                        get_llvm_program(kernel->program)->get_llvm_context()),
       compile_config(compile_config),
       kernel(kernel),
       ir(ir),
@@ -2023,8 +2021,7 @@ std::tuple<llvm::Value *, llvm::Value *> TaskCodeGenLLVM::get_range_for_bounds(
   return std::tuple(begin, end);
 }
 
-void TaskCodeGenLLVM::create_offload_struct_for(OffloadedStmt *stmt,
-                                                bool spmd) {
+void TaskCodeGenLLVM::create_offload_struct_for(OffloadedStmt *stmt) {
   using namespace llvm;
   // TODO: instead of constructing tons of LLVM IR, writing the logic in
   // runtime.cpp may be a cleaner solution. See
@@ -2124,18 +2121,9 @@ void TaskCodeGenLLVM::create_offload_struct_for(OffloadedStmt *stmt,
       call("block_barrier");  // "__syncthreads()"
     }
 
-    llvm::Value *thread_idx = nullptr, *block_dim = nullptr;
-
-    if (spmd) {
-      thread_idx =
-          builder->CreateIntrinsic(Intrinsic::nvvm_read_ptx_sreg_tid_x, {}, {});
-      block_dim = builder->CreateIntrinsic(Intrinsic::nvvm_read_ptx_sreg_ntid_x,
-                                           {}, {});
-      builder->CreateStore(builder->CreateAdd(thread_idx, lower_bound),
-                           loop_index);
-    } else {
-      builder->CreateStore(lower_bound, loop_index);
-    }
+    auto [thread_idx, block_dim] = this->get_spmd_info();
+    builder->CreateStore(builder->CreateAdd(thread_idx, lower_bound),
+                         loop_index);
 
     auto loop_test_bb = BasicBlock::Create(*llvm_context, "loop_test", func);
     auto loop_body_bb = BasicBlock::Create(*llvm_context, "loop_body", func);
@@ -2218,11 +2206,7 @@ void TaskCodeGenLLVM::create_offload_struct_for(OffloadedStmt *stmt,
       // body tail: increment loop_index and jump to loop_test
       builder->SetInsertPoint(body_tail_bb);
 
-      if (spmd) {
-        create_increment(loop_index, block_dim);
-      } else {
-        create_increment(loop_index, tlctx->get_constant(1));
-      }
+      create_increment(loop_index, block_dim);
       builder->CreateBr(loop_test_bb);
 
       builder->SetInsertPoint(func_exit);
@@ -2551,7 +2535,7 @@ FunctionCreationGuard TaskCodeGenLLVM::get_function_creation_guard(
 }
 
 void TaskCodeGenLLVM::initialize_context() {
-  tlctx = get_llvm_program(prog)->get_llvm_context(compile_config.arch);
+  tlctx = get_llvm_program(prog)->get_llvm_context();
   llvm_context = tlctx->get_this_thread_context();
   builder = std::make_unique<llvm::IRBuilder<>>(*llvm_context);
 }

diff --git a/taichi/codegen/llvm/codegen_llvm.h b/taichi/codegen/llvm/codegen_llvm.h
@@ -332,7 +332,7 @@ class TaskCodeGenLLVM : public IRVisitor, public LLVMModuleBuilder {
     TI_NOT_IMPLEMENTED;
   }
 
-  void create_offload_struct_for(OffloadedStmt *stmt, bool spmd = false);
+  void create_offload_struct_for(OffloadedStmt *stmt);
 
   void visit(LoopIndexStmt *stmt) override;
 
@@ -410,6 +410,8 @@ class TaskCodeGenLLVM : public IRVisitor, public LLVMModuleBuilder {
                      const Type *current_type,
                      int &current_element,
                      std::vector<llvm::Value *> &current_index);
+
+  virtual std::tuple<llvm::Value *, llvm::Value *> get_spmd_info() = 0;
 };
 
 }  // namespace taichi::lang

diff --git a/taichi/codegen/llvm/llvm_codegen_utils.h b/taichi/codegen/llvm/llvm_codegen_utils.h
@@ -10,6 +10,11 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IntrinsicsNVPTX.h"
+
+#if defined(TI_WITH_AMDGPU)
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#endif
+
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"

diff --git a/taichi/codegen/llvm/struct_llvm.cpp b/taichi/codegen/llvm/struct_llvm.cpp
@@ -28,7 +28,7 @@ StructCompilerLLVM::StructCompilerLLVM(Arch arch,
                                        int snode_tree_id)
     : StructCompilerLLVM(arch,
                          *prog->config,
-                         prog->get_llvm_context(arch),
+                         prog->get_llvm_context(),
                          std::move(module),
                          snode_tree_id) {
 }

diff --git a/taichi/codegen/wasm/codegen_wasm.cpp b/taichi/codegen/wasm/codegen_wasm.cpp
@@ -234,13 +234,17 @@ class TaskCodeGenWASM : public TaskCodeGenLLVM {
     res.module = std::move(this->module);
     return res;
   }
+
+ private:
+  std::tuple<llvm::Value *, llvm::Value *> get_spmd_info() override {
+    TI_NOT_IMPLEMENTED;
+  }
 };
 
 FunctionType KernelCodeGenWASM::compile_to_function() {
   TI_AUTO_PROF
   auto linked = compile_kernel_to_module();
-  auto *tlctx =
-      get_llvm_program(prog)->get_llvm_context(get_compile_config().arch);
+  auto *tlctx = get_llvm_program(prog)->get_llvm_context();
   tlctx->create_jit_module(std::move(linked.module));
   auto kernel_symbol = tlctx->lookup_function_pointer(linked.tasks[0].name);
   return [=](RuntimeContext &context) {
@@ -279,7 +283,7 @@ LLVMCompiledTask KernelCodeGenWASM::compile_task(
 
 LLVMCompiledKernel KernelCodeGenWASM::compile_kernel_to_module() {
   const auto &config = get_compile_config();
-  auto *tlctx = get_llvm_program(prog)->get_llvm_context(config.arch);
+  auto *tlctx = get_llvm_program(prog)->get_llvm_context();
   irpass::ast_to_ir(config, *kernel, true);
 
   auto res = compile_task(config);