Merge branch 'master' into rt_gfx

taichi-dev · Jun 8, 2022 · 0344fd7 · 0344fd7
2 parents 11fea3d + 9c4fa73
commit 0344fd7
Show file tree

Hide file tree

Showing 17 changed files with 415 additions and 358 deletions.
diff --git a/python/taichi/examples/ggui_examples/mass_spring_3d_ggui.py b/python/taichi/examples/ggui_examples/mass_spring_3d_ggui.py
@@ -21,6 +21,7 @@
 num_triangles = (n - 1) * (n - 1) * 2
 indices = ti.field(int, shape=num_triangles * 3)
 vertices = ti.Vector.field(3, dtype=float, shape=n * n)
+colors = ti.Vector.field(3, dtype=float, shape=n * n)
 
 bending_springs = False
 
@@ -49,6 +50,11 @@ def initialize_mesh_indices():
         indices[quad_id * 6 + 4] = i * n + (j + 1)
         indices[quad_id * 6 + 5] = (i + 1) * n + j
 
+    for i, j in ti.ndrange(n, n):
+        if (i // 4 + j // 4) % 2 == 0:
+            colors[i * n + j] = (0.22, 0.72, 0.52)
+        else:
+            colors[i * n + j] = (1, 0.334, 0.52)
 
 initialize_mesh_indices()
 
@@ -130,13 +136,14 @@ def update_vertices():
     scene.set_camera(camera)
 
     scene.point_light(pos=(0, 1, 2), color=(1, 1, 1))
+    scene.ambient_light((0.5, 0.5, 0.5))
     scene.mesh(vertices,
                indices=indices,
-               color=(0.8, 0, 0),
+               per_vertex_color=colors,
                two_sided=True)
 
     # Draw a smaller ball to avoid visual penetration
-    scene.particles(ball_center, radius=ball_radius * 0.95, color=(0.2, 0.6, 1))
+    scene.particles(ball_center, radius=ball_radius * 0.95, color=(0.5, 0.42, 0.8))
     canvas.scene(scene)
     window.show()
 

diff --git a/python/taichi/lang/kernel_impl.py b/python/taichi/lang/kernel_impl.py
@@ -656,29 +656,40 @@ def func__(*args):
                     is_numpy = isinstance(v, np.ndarray)
                     is_torch = isinstance(v,
                                           torch.Tensor) if has_torch else False
+
+                    # Element shapes are already spcialized in Taichi codegen.
+                    # The shape information for element dims are no longer needed.
+                    # Therefore we strip the element shapes from the shape vector,
+                    # so that it only holds "real" array shapes.
+                    is_soa = needed.layout == Layout.SOA
+                    array_shape = v.shape
+                    element_dim = needed.element_dim
+                    if element_dim:
+                        array_shape = v.shape[
+                            element_dim:] if is_soa else v.shape[:-element_dim]
                     if is_numpy:
                         tmp = np.ascontiguousarray(v)
                         # Purpose: DO NOT GC |tmp|!
                         tmps.append(tmp)
                         launch_ctx.set_arg_external_array_with_shape(
                             actual_argument_slot, int(tmp.ctypes.data),
-                            tmp.nbytes, v.shape)
+                            tmp.nbytes, array_shape)
                     elif is_torch:
                         is_ndarray = False
                         tmp, torch_callbacks = self.get_torch_callbacks(
                             v, has_torch, is_ndarray)
                         callbacks += torch_callbacks
                         launch_ctx.set_arg_external_array_with_shape(
                             actual_argument_slot, int(tmp.data_ptr()),
-                            tmp.element_size() * tmp.nelement(), v.shape)
+                            tmp.element_size() * tmp.nelement(), array_shape)
                     else:
                         # For now, paddle.fluid.core.Tensor._ptr() is only available on develop branch
                         tmp, paddle_callbacks = self.get_paddle_callbacks(
                             v, has_pp)
                         callbacks += paddle_callbacks
                         launch_ctx.set_arg_external_array_with_shape(
                             actual_argument_slot, int(tmp._ptr()),
-                            v.element_size() * v.size, v.shape)
+                            v.element_size() * v.size, array_shape)
 
                 elif isinstance(needed, MatrixType):
                     if id(needed.dtype) in primitive_types.real_type_ids:

diff --git a/taichi/backends/cc/codegen_cc.cpp b/taichi/backends/cc/codegen_cc.cpp
@@ -156,9 +156,23 @@ class CCTransformer : public IRVisitor {
     std::string offset = "0";
     const auto *argload = stmt->base_ptrs[0]->as<ArgLoadStmt>();
     const int arg_id = argload->arg_id;
+    const auto element_shape = stmt->element_shape;
+    const auto layout = stmt->element_dim < 0 ? ExternalArrayLayout::kAOS
+                                              : ExternalArrayLayout::kSOA;
+    const size_t element_shape_index_offset =
+        (layout == ExternalArrayLayout::kAOS)
+            ? stmt->indices.size() - element_shape.size()
+            : 0;
+    size_t size_var_index = 0;
     for (int i = 0; i < stmt->indices.size(); i++) {
-      auto stride = fmt::format("ti_ctx->earg[{} * {} + {}]", arg_id,
-                                taichi_max_num_indices, i);
+      std::string stride;
+      if (i >= element_shape_index_offset &&
+          i < element_shape_index_offset + element_shape.size()) {
+        stride = fmt::format("{}", element_shape[i - element_shape.size()]);
+      } else {
+        stride = fmt::format("ti_ctx->earg[{} * {} + {}]", arg_id,
+                             taichi_max_num_indices, size_var_index++);
+      }
       offset = fmt::format("({} * {} + {})", offset, stride,
                            stmt->indices[i]->raw_name());
     }

diff --git a/taichi/backends/metal/codegen_metal.cpp b/taichi/backends/metal/codegen_metal.cpp
@@ -472,50 +472,30 @@ class KernelCodegenImpl : public IRVisitor {
       const int num_indices = stmt->indices.size();
       const auto &element_shape = stmt->element_shape;
       std::vector<std::string> size_exprs;
-      enum ExternalArrayLayout { layout_AOS = 0, layout_SOA = 1 };
-      const auto layout = stmt->element_dim <= 0 ? layout_AOS : layout_SOA;
-
-      // Args buffer arrange dimensions from outer to inner
-      // AoS args buffer:   array_shape|element_shape
-      // SoA args buffer: element_shape|array_shape
-      //
-      // ti.Matrix.ndarray(3, 2, ti.f32, (5, 4), layout=ti.Layout.AOS)
-      // args buffer: 5, 4, 3, 2
-      // ti.Matrix.ndarray(3, 2, ti.f32, (5, 4), layout=ti.Layout.SOA)
-      // args buffer: 3, 2, 5, 4
+      const auto layout = stmt->element_dim <= 0 ? ExternalArrayLayout::kAOS
+                                                 : ExternalArrayLayout::kSOA;
       const int arr_shape_len = num_indices - element_shape.size();
-      int index_i = 0;
-      const auto add_elem_shape_exprs = [&]() {
-        for (int es : element_shape) {
-          size_exprs.push_back(std::to_string(es));
-          ++index_i;
-        }
-      };
-      int arr_shape_offset = 0;
-      if (layout == layout_SOA) {
-        add_elem_shape_exprs();
-        // When the layout is SOA, element shape comes before array shape, so
-        // we have to skip the element shapes first.
-        // TODO: Element shape is a compile-time known information, so extra
-        // args will always only need the array shape.
-        arr_shape_offset = element_shape.size();
-      }
+      const size_t element_shape_index_offset =
+          (layout == ExternalArrayLayout::kAOS) ? arr_shape_len : 0;
       for (int i = 0; i < arr_shape_len; i++) {
         std::string var_name =
             fmt::format("{}_arr_dim{}_", stmt->raw_name(), i);
         emit("const int {} = {}.extra_arg({}, {});", var_name, kContextVarName,
-             arg_id, i + arr_shape_offset);
+             arg_id, i);
         size_exprs.push_back(std::move(var_name));
-        ++index_i;
-      }
-      if (layout == layout_AOS) {
-        add_elem_shape_exprs();
       }
-      TI_ASSERT(index_i == num_indices);
+      size_t size_var_index = 0;
       for (int i = 0; i < num_indices; i++) {
-        emit("{} *= {};", linear_index_name, size_exprs[i]);
+        if (i >= element_shape_index_offset &&
+            i < element_shape_index_offset + element_shape.size()) {
+          emit("{} *= {};", linear_index_name,
+               element_shape[i - element_shape_index_offset]);
+        } else {
+          emit("{} *= {};", linear_index_name, size_exprs[size_var_index++]);
+        }
         emit("{} += {};", linear_index_name, stmt->indices[i]->raw_name());
       }
+      TI_ASSERT(size_var_index == arr_shape_len);
     }
     emit("}}");
 

diff --git a/taichi/backends/opengl/codegen_opengl.cpp b/taichi/backends/opengl/codegen_opengl.cpp
@@ -485,49 +485,13 @@ class KernelGen : public IRVisitor {
     const int num_indices = stmt->indices.size();
     const auto &element_shape = stmt->element_shape;
     std::vector<std::string> size_var_names;
-    std::vector<std::string> element_shape_size_var_names;
 
     const auto layout = stmt->element_dim <= 0 ? ExternalArrayLayout::kAOS
                                                : ExternalArrayLayout::kSOA;
-
-    if (element_shape.size() > 0) {
-      int elem_beg = 0;
-      int elem_end = 0;
-      if (layout == ExternalArrayLayout::kSOA) {
-        elem_beg = 0;
-        elem_end = element_shape.size();
-      } else {
-        elem_beg = num_indices - element_shape.size();
-        elem_end = num_indices;
-      }
-      for (int i = elem_beg; i < elem_end; i++) {
-        used.int32 = true;
-        std::string var_name = fmt::format("_s{}_{}{}", i, "arr", arg_id);
-        if (!loaded_args_.count(var_name)) {
-          emit("int {} = {};", var_name, element_shape[i - elem_beg]);
-          loaded_args_.insert(var_name);
-        }
-        element_shape_size_var_names.push_back(std::move(var_name));
-      }
-    }
-    // Args buffer arrange dimensions from outer to inner
-    // AoS args buffer:   array_shape|element_shape
-    // SoA args buffer: element_shape|array_shape
-    //
-    // ti.Matrix.ndarray(3, 2, ti.f32, (5, 4), layout=ti.Layout.AOS)
-    // args buffer: 5, 4, 3, 2
-    // ti.Matrix.ndarray(3, 2, ti.f32, (5, 4), layout=ti.Layout.SOA)
-    // args buffer: 3, 2, 5, 4
-    int ind_beg = 0;
-    int ind_end = 0;
-    if (layout == ExternalArrayLayout::kSOA) {
-      ind_beg = element_shape.size();
-      ind_end = num_indices;
-    } else {
-      ind_beg = 0;
-      ind_end = num_indices - element_shape.size();
-    }
-    for (int i = ind_beg; i < ind_end; i++) {
+    const size_t element_shape_index_offset =
+        layout == ExternalArrayLayout::kAOS ? num_indices - element_shape.size()
+                                            : 0;
+    for (int i = 0; i < num_indices - element_shape.size(); i++) {
       used.buf_args = true;
       used.int32 = true;
       std::string var_name = fmt::format("_s{}_{}{}", i, "arr", arg_id);
@@ -540,22 +504,20 @@ class KernelGen : public IRVisitor {
       }
       size_var_names.push_back(std::move(var_name));
     }
-    // Arrange index stride and offsets in correct order
-    if (layout == ExternalArrayLayout::kSOA) {
-      size_var_names.insert(size_var_names.begin(),
-                            element_shape_size_var_names.begin(),
-                            element_shape_size_var_names.end());
-    } else {
-      size_var_names.insert(size_var_names.end(),
-                            element_shape_size_var_names.begin(),
-                            element_shape_size_var_names.end());
-    }
 
     emit("int {} = {};", linear_index_name,
          num_indices == 0 ? "0" : stmt->indices[0]->short_name());
 
+    size_t size_var_name_index = (layout == ExternalArrayLayout::kAOS) ? 1 : 0;
     for (int i = 1; i < num_indices; i++) {
-      emit("{} *= {};", linear_index_name, size_var_names[i]);
+      if (i >= element_shape_index_offset &&
+          i < element_shape_index_offset + element_shape.size()) {
+        emit("{} *= {};", linear_index_name,
+             std::to_string(element_shape[i - element_shape_index_offset]));
+      } else {
+        emit("{} *= {};", linear_index_name,
+             size_var_names[size_var_name_index++]);
+      }
       emit("{} += {};", linear_index_name, stmt->indices[i]->short_name());
     }
 

diff --git a/taichi/codegen/codegen_llvm.cpp b/taichi/codegen/codegen_llvm.cpp
@@ -324,28 +324,6 @@ CodeGenLLVM::CodeGenLLVM(Kernel *kernel,
   kernel_name = kernel->name + "_kernel";
 }
 
-llvm::Value *CodeGenLLVM::cast_int(llvm::Value *input_val,
-                                   Type *from,
-                                   Type *to) {
-  if (from == to)
-    return input_val;
-  auto from_size = 0;
-  if (from->is<CustomIntType>()) {
-    from_size = data_type_size(from->cast<CustomIntType>()->get_compute_type());
-  } else {
-    from_size = data_type_size(from);
-  }
-  if (from_size < data_type_size(to)) {
-    if (is_signed(from)) {
-      return builder->CreateSExt(input_val, tlctx->get_data_type(to));
-    } else {
-      return builder->CreateZExt(input_val, tlctx->get_data_type(to));
-    }
-  } else {
-    return builder->CreateTrunc(input_val, tlctx->get_data_type(to));
-  }
-}
-
 void CodeGenLLVM::visit(DecorationStmt *stmt) {
 }
 
@@ -404,9 +382,8 @@ void CodeGenLLVM::visit(UnaryOpStmt *stmt) {
         }
       }
     } else if (!is_real(from) && !is_real(to)) {
-      // TODO: implement casting into custom integer type
-      TI_ASSERT(!to->is<CustomIntType>());
-      llvm_val[stmt] = cast_int(llvm_val[stmt->operand], from, to);
+      llvm_val[stmt] = builder->CreateIntCast(llvm_val[stmt->operand],
+                                              llvm_type(to), is_signed(from));
     }
   } else if (stmt->op_type == UnaryOpType::cast_bits) {
     TI_ASSERT(data_type_size(stmt->ret_type) ==
@@ -1541,6 +1518,22 @@ llvm::Value *CodeGenLLVM::offset_bit_ptr(llvm::Value *input_bit_ptr,
   return create_bit_ptr_struct(byte_ptr_base, new_bit_offset);
 }
 
+std::tuple<llvm::Value *, llvm::Value *> CodeGenLLVM::load_bit_pointer(
+    llvm::Value *ptr) {
+  // 1. load byte pointer
+  auto byte_ptr_in_bit_struct =
+      builder->CreateGEP(ptr, {tlctx->get_constant(0), tlctx->get_constant(0)});
+  auto byte_ptr = builder->CreateLoad(byte_ptr_in_bit_struct);
+  TI_ASSERT(byte_ptr->getType()->getPointerElementType()->isIntegerTy(8));
+
+  // 2. load bit offset
+  auto bit_offset_in_bit_struct =
+      builder->CreateGEP(ptr, {tlctx->get_constant(0), tlctx->get_constant(1)});
+  auto bit_offset = builder->CreateLoad(bit_offset_in_bit_struct);
+  TI_ASSERT(bit_offset->getType()->isIntegerTy(32));
+  return std::make_tuple(byte_ptr, bit_offset);
+}
+
 void CodeGenLLVM::visit(SNodeLookupStmt *stmt) {
   llvm::Value *parent = nullptr;
   parent = llvm_val[stmt->input_snode];
@@ -1617,24 +1610,13 @@ void CodeGenLLVM::visit(ExternalPtrStmt *stmt) {
   int num_indices = stmt->indices.size();
   std::vector<llvm::Value *> sizes(num_indices);
   const auto &element_shape = stmt->element_shape;
-  enum ExternalArrayLayout { layout_AOS = 0, layout_SOA = 1 };
-  const auto layout = stmt->element_dim <= 0 ? layout_AOS : layout_SOA;
-  // Determine the element shape position inside the indices vector
-  // TODO: change the outer layout in order to remove the element layout
-  // guess work
-  int element_shape_begin = -1;
-  int element_shape_end = -1;
-  if (element_shape.size() > 0) {
-    if (layout == layout_SOA) {
-      element_shape_begin = 0;
-      element_shape_end = element_shape.size();
-    } else {
-      element_shape_begin = num_indices - element_shape.size();
-      element_shape_end = num_indices;
-    }
-  }
+  const auto layout = stmt->element_dim <= 0 ? ExternalArrayLayout::kAOS
+                                             : ExternalArrayLayout::kSOA;
+  const size_t element_shape_index_offset =
+      (layout == ExternalArrayLayout::kAOS) ? num_indices - element_shape.size()
+                                            : 0;
 
-  for (int i = 0; i < num_indices; i++) {
+  for (int i = 0; i < num_indices - element_shape.size(); i++) {
     auto raw_arg = create_call(
         "RuntimeContext_get_extra_args",
         {get_context(), tlctx->get_constant(arg_id), tlctx->get_constant(i)});
@@ -1647,18 +1629,19 @@ void CodeGenLLVM::visit(ExternalPtrStmt *stmt) {
       llvm::PointerType::get(tlctx->get_data_type(dt), 0));
 
   auto linear_index = tlctx->get_constant(0);
-  int element_shape_idx = 0;
+  size_t size_var_index = 0;
   for (int i = 0; i < num_indices; i++) {
-    if (i >= element_shape_begin && i < element_shape_end) {
+    if (i >= element_shape_index_offset &&
+        i < element_shape_index_offset + element_shape.size()) {
       llvm::Value *size_var =
-          tlctx->get_constant(element_shape[element_shape_idx++]);
+          tlctx->get_constant(element_shape[i - element_shape_index_offset]);
       linear_index = builder->CreateMul(linear_index, size_var);
     } else {
-      linear_index = builder->CreateMul(linear_index, sizes[i]);
+      linear_index = builder->CreateMul(linear_index, sizes[size_var_index++]);
     }
     linear_index = builder->CreateAdd(linear_index, llvm_val[stmt->indices[i]]);
   }
-
+  TI_ASSERT(size_var_index == num_indices - element_shape.size())
   llvm_val[stmt] = builder->CreateGEP(base, linear_index);
 }
 

diff --git a/taichi/codegen/codegen_llvm.h b/taichi/codegen/codegen_llvm.h
@@ -183,8 +183,6 @@ class CodeGenLLVM : public IRVisitor, public LLVMModuleBuilder {
 
   void visit(RandStmt *stmt) override;
 
-  llvm::Value *cast_int(llvm::Value *input_val, Type *from, Type *to);
-
   virtual void emit_extra_unary(UnaryOpStmt *stmt);
 
   void visit(DecorationStmt *stmt) override;
@@ -317,6 +315,8 @@ class CodeGenLLVM : public IRVisitor, public LLVMModuleBuilder {
 
   llvm::Value *offset_bit_ptr(llvm::Value *input_bit_ptr, int bit_offset_delta);
 
+  std::tuple<llvm::Value *, llvm::Value *> load_bit_pointer(llvm::Value *ptr);
+
   void visit(SNodeLookupStmt *stmt) override;
 
   void visit(GetChStmt *stmt) override;