Skip to content

Commit

Permalink
Merge branch 'master' into rt_gfx
Browse files Browse the repository at this point in the history
  • Loading branch information
qiao-bo committed Jun 8, 2022
2 parents 11fea3d + 9c4fa73 commit 0344fd7
Show file tree
Hide file tree
Showing 17 changed files with 415 additions and 358 deletions.
11 changes: 9 additions & 2 deletions python/taichi/examples/ggui_examples/mass_spring_3d_ggui.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
num_triangles = (n - 1) * (n - 1) * 2
indices = ti.field(int, shape=num_triangles * 3)
vertices = ti.Vector.field(3, dtype=float, shape=n * n)
colors = ti.Vector.field(3, dtype=float, shape=n * n)

bending_springs = False

Expand Down Expand Up @@ -49,6 +50,11 @@ def initialize_mesh_indices():
indices[quad_id * 6 + 4] = i * n + (j + 1)
indices[quad_id * 6 + 5] = (i + 1) * n + j

for i, j in ti.ndrange(n, n):
if (i // 4 + j // 4) % 2 == 0:
colors[i * n + j] = (0.22, 0.72, 0.52)
else:
colors[i * n + j] = (1, 0.334, 0.52)

initialize_mesh_indices()

Expand Down Expand Up @@ -130,13 +136,14 @@ def update_vertices():
scene.set_camera(camera)

scene.point_light(pos=(0, 1, 2), color=(1, 1, 1))
scene.ambient_light((0.5, 0.5, 0.5))
scene.mesh(vertices,
indices=indices,
color=(0.8, 0, 0),
per_vertex_color=colors,
two_sided=True)

# Draw a smaller ball to avoid visual penetration
scene.particles(ball_center, radius=ball_radius * 0.95, color=(0.2, 0.6, 1))
scene.particles(ball_center, radius=ball_radius * 0.95, color=(0.5, 0.42, 0.8))
canvas.scene(scene)
window.show()

Expand Down
17 changes: 14 additions & 3 deletions python/taichi/lang/kernel_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,29 +656,40 @@ def func__(*args):
is_numpy = isinstance(v, np.ndarray)
is_torch = isinstance(v,
torch.Tensor) if has_torch else False

# Element shapes are already spcialized in Taichi codegen.
# The shape information for element dims are no longer needed.
# Therefore we strip the element shapes from the shape vector,
# so that it only holds "real" array shapes.
is_soa = needed.layout == Layout.SOA
array_shape = v.shape
element_dim = needed.element_dim
if element_dim:
array_shape = v.shape[
element_dim:] if is_soa else v.shape[:-element_dim]
if is_numpy:
tmp = np.ascontiguousarray(v)
# Purpose: DO NOT GC |tmp|!
tmps.append(tmp)
launch_ctx.set_arg_external_array_with_shape(
actual_argument_slot, int(tmp.ctypes.data),
tmp.nbytes, v.shape)
tmp.nbytes, array_shape)
elif is_torch:
is_ndarray = False
tmp, torch_callbacks = self.get_torch_callbacks(
v, has_torch, is_ndarray)
callbacks += torch_callbacks
launch_ctx.set_arg_external_array_with_shape(
actual_argument_slot, int(tmp.data_ptr()),
tmp.element_size() * tmp.nelement(), v.shape)
tmp.element_size() * tmp.nelement(), array_shape)
else:
# For now, paddle.fluid.core.Tensor._ptr() is only available on develop branch
tmp, paddle_callbacks = self.get_paddle_callbacks(
v, has_pp)
callbacks += paddle_callbacks
launch_ctx.set_arg_external_array_with_shape(
actual_argument_slot, int(tmp._ptr()),
v.element_size() * v.size, v.shape)
v.element_size() * v.size, array_shape)

elif isinstance(needed, MatrixType):
if id(needed.dtype) in primitive_types.real_type_ids:
Expand Down
18 changes: 16 additions & 2 deletions taichi/backends/cc/codegen_cc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,23 @@ class CCTransformer : public IRVisitor {
std::string offset = "0";
const auto *argload = stmt->base_ptrs[0]->as<ArgLoadStmt>();
const int arg_id = argload->arg_id;
const auto element_shape = stmt->element_shape;
const auto layout = stmt->element_dim < 0 ? ExternalArrayLayout::kAOS
: ExternalArrayLayout::kSOA;
const size_t element_shape_index_offset =
(layout == ExternalArrayLayout::kAOS)
? stmt->indices.size() - element_shape.size()
: 0;
size_t size_var_index = 0;
for (int i = 0; i < stmt->indices.size(); i++) {
auto stride = fmt::format("ti_ctx->earg[{} * {} + {}]", arg_id,
taichi_max_num_indices, i);
std::string stride;
if (i >= element_shape_index_offset &&
i < element_shape_index_offset + element_shape.size()) {
stride = fmt::format("{}", element_shape[i - element_shape.size()]);
} else {
stride = fmt::format("ti_ctx->earg[{} * {} + {}]", arg_id,
taichi_max_num_indices, size_var_index++);
}
offset = fmt::format("({} * {} + {})", offset, stride,
stmt->indices[i]->raw_name());
}
Expand Down
48 changes: 14 additions & 34 deletions taichi/backends/metal/codegen_metal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -472,50 +472,30 @@ class KernelCodegenImpl : public IRVisitor {
const int num_indices = stmt->indices.size();
const auto &element_shape = stmt->element_shape;
std::vector<std::string> size_exprs;
enum ExternalArrayLayout { layout_AOS = 0, layout_SOA = 1 };
const auto layout = stmt->element_dim <= 0 ? layout_AOS : layout_SOA;

// Args buffer arrange dimensions from outer to inner
// AoS args buffer: array_shape|element_shape
// SoA args buffer: element_shape|array_shape
//
// ti.Matrix.ndarray(3, 2, ti.f32, (5, 4), layout=ti.Layout.AOS)
// args buffer: 5, 4, 3, 2
// ti.Matrix.ndarray(3, 2, ti.f32, (5, 4), layout=ti.Layout.SOA)
// args buffer: 3, 2, 5, 4
const auto layout = stmt->element_dim <= 0 ? ExternalArrayLayout::kAOS
: ExternalArrayLayout::kSOA;
const int arr_shape_len = num_indices - element_shape.size();
int index_i = 0;
const auto add_elem_shape_exprs = [&]() {
for (int es : element_shape) {
size_exprs.push_back(std::to_string(es));
++index_i;
}
};
int arr_shape_offset = 0;
if (layout == layout_SOA) {
add_elem_shape_exprs();
// When the layout is SOA, element shape comes before array shape, so
// we have to skip the element shapes first.
// TODO: Element shape is a compile-time known information, so extra
// args will always only need the array shape.
arr_shape_offset = element_shape.size();
}
const size_t element_shape_index_offset =
(layout == ExternalArrayLayout::kAOS) ? arr_shape_len : 0;
for (int i = 0; i < arr_shape_len; i++) {
std::string var_name =
fmt::format("{}_arr_dim{}_", stmt->raw_name(), i);
emit("const int {} = {}.extra_arg({}, {});", var_name, kContextVarName,
arg_id, i + arr_shape_offset);
arg_id, i);
size_exprs.push_back(std::move(var_name));
++index_i;
}
if (layout == layout_AOS) {
add_elem_shape_exprs();
}
TI_ASSERT(index_i == num_indices);
size_t size_var_index = 0;
for (int i = 0; i < num_indices; i++) {
emit("{} *= {};", linear_index_name, size_exprs[i]);
if (i >= element_shape_index_offset &&
i < element_shape_index_offset + element_shape.size()) {
emit("{} *= {};", linear_index_name,
element_shape[i - element_shape_index_offset]);
} else {
emit("{} *= {};", linear_index_name, size_exprs[size_var_index++]);
}
emit("{} += {};", linear_index_name, stmt->indices[i]->raw_name());
}
TI_ASSERT(size_var_index == arr_shape_len);
}
emit("}}");

Expand Down
64 changes: 13 additions & 51 deletions taichi/backends/opengl/codegen_opengl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -485,49 +485,13 @@ class KernelGen : public IRVisitor {
const int num_indices = stmt->indices.size();
const auto &element_shape = stmt->element_shape;
std::vector<std::string> size_var_names;
std::vector<std::string> element_shape_size_var_names;

const auto layout = stmt->element_dim <= 0 ? ExternalArrayLayout::kAOS
: ExternalArrayLayout::kSOA;

if (element_shape.size() > 0) {
int elem_beg = 0;
int elem_end = 0;
if (layout == ExternalArrayLayout::kSOA) {
elem_beg = 0;
elem_end = element_shape.size();
} else {
elem_beg = num_indices - element_shape.size();
elem_end = num_indices;
}
for (int i = elem_beg; i < elem_end; i++) {
used.int32 = true;
std::string var_name = fmt::format("_s{}_{}{}", i, "arr", arg_id);
if (!loaded_args_.count(var_name)) {
emit("int {} = {};", var_name, element_shape[i - elem_beg]);
loaded_args_.insert(var_name);
}
element_shape_size_var_names.push_back(std::move(var_name));
}
}
// Args buffer arrange dimensions from outer to inner
// AoS args buffer: array_shape|element_shape
// SoA args buffer: element_shape|array_shape
//
// ti.Matrix.ndarray(3, 2, ti.f32, (5, 4), layout=ti.Layout.AOS)
// args buffer: 5, 4, 3, 2
// ti.Matrix.ndarray(3, 2, ti.f32, (5, 4), layout=ti.Layout.SOA)
// args buffer: 3, 2, 5, 4
int ind_beg = 0;
int ind_end = 0;
if (layout == ExternalArrayLayout::kSOA) {
ind_beg = element_shape.size();
ind_end = num_indices;
} else {
ind_beg = 0;
ind_end = num_indices - element_shape.size();
}
for (int i = ind_beg; i < ind_end; i++) {
const size_t element_shape_index_offset =
layout == ExternalArrayLayout::kAOS ? num_indices - element_shape.size()
: 0;
for (int i = 0; i < num_indices - element_shape.size(); i++) {
used.buf_args = true;
used.int32 = true;
std::string var_name = fmt::format("_s{}_{}{}", i, "arr", arg_id);
Expand All @@ -540,22 +504,20 @@ class KernelGen : public IRVisitor {
}
size_var_names.push_back(std::move(var_name));
}
// Arrange index stride and offsets in correct order
if (layout == ExternalArrayLayout::kSOA) {
size_var_names.insert(size_var_names.begin(),
element_shape_size_var_names.begin(),
element_shape_size_var_names.end());
} else {
size_var_names.insert(size_var_names.end(),
element_shape_size_var_names.begin(),
element_shape_size_var_names.end());
}

emit("int {} = {};", linear_index_name,
num_indices == 0 ? "0" : stmt->indices[0]->short_name());

size_t size_var_name_index = (layout == ExternalArrayLayout::kAOS) ? 1 : 0;
for (int i = 1; i < num_indices; i++) {
emit("{} *= {};", linear_index_name, size_var_names[i]);
if (i >= element_shape_index_offset &&
i < element_shape_index_offset + element_shape.size()) {
emit("{} *= {};", linear_index_name,
std::to_string(element_shape[i - element_shape_index_offset]));
} else {
emit("{} *= {};", linear_index_name,
size_var_names[size_var_name_index++]);
}
emit("{} += {};", linear_index_name, stmt->indices[i]->short_name());
}

Expand Down
77 changes: 30 additions & 47 deletions taichi/codegen/codegen_llvm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -324,28 +324,6 @@ CodeGenLLVM::CodeGenLLVM(Kernel *kernel,
kernel_name = kernel->name + "_kernel";
}

llvm::Value *CodeGenLLVM::cast_int(llvm::Value *input_val,
Type *from,
Type *to) {
if (from == to)
return input_val;
auto from_size = 0;
if (from->is<CustomIntType>()) {
from_size = data_type_size(from->cast<CustomIntType>()->get_compute_type());
} else {
from_size = data_type_size(from);
}
if (from_size < data_type_size(to)) {
if (is_signed(from)) {
return builder->CreateSExt(input_val, tlctx->get_data_type(to));
} else {
return builder->CreateZExt(input_val, tlctx->get_data_type(to));
}
} else {
return builder->CreateTrunc(input_val, tlctx->get_data_type(to));
}
}

void CodeGenLLVM::visit(DecorationStmt *stmt) {
}

Expand Down Expand Up @@ -404,9 +382,8 @@ void CodeGenLLVM::visit(UnaryOpStmt *stmt) {
}
}
} else if (!is_real(from) && !is_real(to)) {
// TODO: implement casting into custom integer type
TI_ASSERT(!to->is<CustomIntType>());
llvm_val[stmt] = cast_int(llvm_val[stmt->operand], from, to);
llvm_val[stmt] = builder->CreateIntCast(llvm_val[stmt->operand],
llvm_type(to), is_signed(from));
}
} else if (stmt->op_type == UnaryOpType::cast_bits) {
TI_ASSERT(data_type_size(stmt->ret_type) ==
Expand Down Expand Up @@ -1541,6 +1518,22 @@ llvm::Value *CodeGenLLVM::offset_bit_ptr(llvm::Value *input_bit_ptr,
return create_bit_ptr_struct(byte_ptr_base, new_bit_offset);
}

std::tuple<llvm::Value *, llvm::Value *> CodeGenLLVM::load_bit_pointer(
llvm::Value *ptr) {
// 1. load byte pointer
auto byte_ptr_in_bit_struct =
builder->CreateGEP(ptr, {tlctx->get_constant(0), tlctx->get_constant(0)});
auto byte_ptr = builder->CreateLoad(byte_ptr_in_bit_struct);
TI_ASSERT(byte_ptr->getType()->getPointerElementType()->isIntegerTy(8));

// 2. load bit offset
auto bit_offset_in_bit_struct =
builder->CreateGEP(ptr, {tlctx->get_constant(0), tlctx->get_constant(1)});
auto bit_offset = builder->CreateLoad(bit_offset_in_bit_struct);
TI_ASSERT(bit_offset->getType()->isIntegerTy(32));
return std::make_tuple(byte_ptr, bit_offset);
}

void CodeGenLLVM::visit(SNodeLookupStmt *stmt) {
llvm::Value *parent = nullptr;
parent = llvm_val[stmt->input_snode];
Expand Down Expand Up @@ -1617,24 +1610,13 @@ void CodeGenLLVM::visit(ExternalPtrStmt *stmt) {
int num_indices = stmt->indices.size();
std::vector<llvm::Value *> sizes(num_indices);
const auto &element_shape = stmt->element_shape;
enum ExternalArrayLayout { layout_AOS = 0, layout_SOA = 1 };
const auto layout = stmt->element_dim <= 0 ? layout_AOS : layout_SOA;
// Determine the element shape position inside the indices vector
// TODO: change the outer layout in order to remove the element layout
// guess work
int element_shape_begin = -1;
int element_shape_end = -1;
if (element_shape.size() > 0) {
if (layout == layout_SOA) {
element_shape_begin = 0;
element_shape_end = element_shape.size();
} else {
element_shape_begin = num_indices - element_shape.size();
element_shape_end = num_indices;
}
}
const auto layout = stmt->element_dim <= 0 ? ExternalArrayLayout::kAOS
: ExternalArrayLayout::kSOA;
const size_t element_shape_index_offset =
(layout == ExternalArrayLayout::kAOS) ? num_indices - element_shape.size()
: 0;

for (int i = 0; i < num_indices; i++) {
for (int i = 0; i < num_indices - element_shape.size(); i++) {
auto raw_arg = create_call(
"RuntimeContext_get_extra_args",
{get_context(), tlctx->get_constant(arg_id), tlctx->get_constant(i)});
Expand All @@ -1647,18 +1629,19 @@ void CodeGenLLVM::visit(ExternalPtrStmt *stmt) {
llvm::PointerType::get(tlctx->get_data_type(dt), 0));

auto linear_index = tlctx->get_constant(0);
int element_shape_idx = 0;
size_t size_var_index = 0;
for (int i = 0; i < num_indices; i++) {
if (i >= element_shape_begin && i < element_shape_end) {
if (i >= element_shape_index_offset &&
i < element_shape_index_offset + element_shape.size()) {
llvm::Value *size_var =
tlctx->get_constant(element_shape[element_shape_idx++]);
tlctx->get_constant(element_shape[i - element_shape_index_offset]);
linear_index = builder->CreateMul(linear_index, size_var);
} else {
linear_index = builder->CreateMul(linear_index, sizes[i]);
linear_index = builder->CreateMul(linear_index, sizes[size_var_index++]);
}
linear_index = builder->CreateAdd(linear_index, llvm_val[stmt->indices[i]]);
}

TI_ASSERT(size_var_index == num_indices - element_shape.size())
llvm_val[stmt] = builder->CreateGEP(base, linear_index);
}

Expand Down
4 changes: 2 additions & 2 deletions taichi/codegen/codegen_llvm.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,6 @@ class CodeGenLLVM : public IRVisitor, public LLVMModuleBuilder {

void visit(RandStmt *stmt) override;

llvm::Value *cast_int(llvm::Value *input_val, Type *from, Type *to);

virtual void emit_extra_unary(UnaryOpStmt *stmt);

void visit(DecorationStmt *stmt) override;
Expand Down Expand Up @@ -317,6 +315,8 @@ class CodeGenLLVM : public IRVisitor, public LLVMModuleBuilder {

llvm::Value *offset_bit_ptr(llvm::Value *input_bit_ptr, int bit_offset_delta);

std::tuple<llvm::Value *, llvm::Value *> load_bit_pointer(llvm::Value *ptr);

void visit(SNodeLookupStmt *stmt) override;

void visit(GetChStmt *stmt) override;
Expand Down
Loading

0 comments on commit 0344fd7

Please sign in to comment.