Skip to content

Commit

Permalink
add matmul with pack test (PaddlePaddle#75)
Browse files Browse the repository at this point in the history
* fix bug in absolute address of element in buffer

* update packed test
  • Loading branch information
Superjomn committed Mar 9, 2020
1 parent d24d1b9 commit 525f066
Show file tree
Hide file tree
Showing 6 changed files with 33 additions and 81 deletions.
37 changes: 25 additions & 12 deletions cinn/backends/codegen_c_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -328,11 +328,11 @@ void matmul(const struct cinn_buffer_t *_A, const struct cinn_buffer_t *_B, stru
ASSERT_EQ(Trim(target_out), Trim(out));
}

TEST(CodeGenC, matmul_with_packed) {
TEST(CodeGenC, matmul_packed) {
const int M = 100;
const int K = 20;
const int N = 50;
const int bn = 4;
const int K = 200;
const int N = 500;
const int bn = 32;
Placeholder<float> A("A", {M, K});
Placeholder<float> B("B", {K, N});

Expand All @@ -348,6 +348,13 @@ TEST(CodeGenC, matmul_with_packed) {
{M, N}, [&](Expr i, Expr j) { return A(i, k) * packedB(j / bn, k, j % bn); }, "C", k);
C->Bind(C_buf);

{
poly::Iterator i_outer, i_inner, j_outer, j_inner, k_outer, k_inner;
std::tie(i_outer, i_inner, j_outer, j_inner) = C->stage()->Tile(0, 1, bn, bn);
std::tie(k_outer, k_inner) = C->stage()->Split(poly::Iterator("k"), 4);
C->stage()->Reorder({i_outer, j_outer, i_inner, j_inner, k_outer, k_inner});
}

// Code gen
auto funcs = Lower("matmul_with_packing", {A, B, packedB, C});
ASSERT_EQ(funcs.size(), 1UL);
Expand Down Expand Up @@ -380,17 +387,23 @@ void matmul_with_packing(const struct cinn_buffer_t *_A, const struct cinn_buffe
const float* B = (const float*)(cinn_buffer_get_data_const_handle(_B));
float* C = (float*)(cinn_buffer_get_data_handle(_C));
float* PackedB = (float*)(cinn_buffer_get_data_handle(_PackedB));
for (int32_t i = 0; (i <= 11); i += 1){
for (int32_t j = 0; (j <= 19); j += 1){
for (int32_t k = 0; (k <= 3); k += 1){
PackedB[(((i * 20) + (j * 4)) + k)] = B[((j * 50) + ((i * 4) + k))];
for (int32_t i = 0; (i <= 14); i += 1){
for (int32_t j = 0; (j <= 199); j += 1){
for (int32_t k = 0; (k <= 31); k += 1){
PackedB[((((i * 200) * 32) + (j * 32)) + k)] = B[((j * 500) + ((i * 32) + k))];
};
};
};
for (int32_t i = 0; (i <= 99); i += 1){
for (int32_t j = 0; (j <= 49); j += 1){
for (int32_t k = 0; (k <= 19); k += 1){
C[((i * 50) + j)] = (A[((i * 20) + k)] * PackedB[((((j / 4) * 20) + (k * 4)) + (j % 4))]);
for (int32_t i_outer = 0; (i_outer <= 3); i_outer += 1){
for (int32_t j_outer = 0; (j_outer <= 15); j_outer += 1){
for (int32_t i_inner = 0; (i_inner <= min(31, ((-32 * i_outer) + 99))); i_inner += 1){
for (int32_t j_inner = 0; (j_inner <= min(31, ((-32 * j_outer) + 499))); j_inner += 1){
for (int32_t k_outer = 0; (k_outer <= 49); k_outer += 1){
for (int32_t k_inner = 0; (k_inner <= 3); k_inner += 1){
C[((((32 * i_outer) + i_inner) * 500) + ((32 * j_outer) + j_inner))] = (A[((((32 * i_outer) + i_inner) * 200) + ((4 * k_outer) + k_inner))] * PackedB[(((((((32 * j_outer) + j_inner) / 32) * 200) * 32) + (((4 * k_outer) + k_inner) * 32)) + (((32 * j_outer) + j_inner) % 32))]);
};
};
};
};
};
};
Expand Down
4 changes: 3 additions & 1 deletion cinn/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ set(srcs
object.cc
graph_utils.cc
context.cc
axis.cc)
axis.cc
ir.cc
)

foreach(cpp ${srcs})
set(core_src
Expand Down
26 changes: 1 addition & 25 deletions cinn/ir/buffer.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "cinn/ir/buffer.h"

#include "cinn/common/common.h"
#include "cinn/common/ir.h"
#include "cinn/ir/ir_operators.h"
#include "cinn/ir/ir_visitor.h"
#include "cinn/runtime/intrinsic.h"
Expand Down Expand Up @@ -77,31 +78,6 @@ Var _Buffer_::buffer_addr() const {
return _Var_::Make(name, thetype);
}

/*
Expr Buffer::LoadExpr(const std::vector<Expr> &indice) const {
NOT_IMPLEMENTED
auto *node = operator->();
return Load::Make(Expr(*this), AbsOffset(indice));
}
Expr Buffer::StoreExpr(const std::vector<Expr> &indice, Expr value) const {
auto *node = operator->();
return Store::Make(Expr(*this), value, AbsOffset(indice));
}
*/

Expr Buffer::AbsOffset(const std::vector<Expr> &indice) const {
auto *node = operator->();
CHECK(!node->shape.empty());
CHECK_EQ(node->shape.size(), indice.size()) << "shape and indice not match";
Expr res = indice.front() * node->shape[1];
for (int i = 1; i < node->shape.size() - 1; i++) {
res = res + indice[i] * node->shape[i + 1];
}
if (node->shape.size() > 1) res = res + indice.back();
return res;
}

Expr Buffer::DestroyExpr() const {
auto *node = operator->();
return ir::Call::Make(
Expand Down
4 changes: 0 additions & 4 deletions cinn/ir/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,6 @@ class Buffer : public IrNodeRef {

const _Buffer_* operator->() const;
_Buffer_* operator->();

protected:
//! Get a 1-dimension offset given multi-dimension indices.
Expr AbsOffset(const std::vector<Expr>& indice) const;
};

class _Buffer_ : public ExprNode<_Buffer_> {
Expand Down
37 changes: 4 additions & 33 deletions cinn/lang/tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,35 +3,17 @@
#include <cstring>

#include "cinn/common/common.h"
#include "cinn/common/ir.h"
#include "cinn/ir/buffer.h"
#include "cinn/ir/ir_operators.h"
#include "cinn/ir/ir_printer.h"
#include "cinn/ir/ir_visitor.h"
#include "cinn/ir/operation.h"
#include "cinn/poly/stage.h"

namespace cinn {
namespace ir {

namespace detail {

Expr ExpandTo1DIndice(const std::vector<Expr> &shape, const std::vector<Expr> &indices) {
CHECK_EQ(shape.size(), indices.size());
Expr res = indices.front() * shape[1];
for (int i = 1; i < shape.size() - 1; i++) {
res = res + indices[i] * shape[i + 1];
}
if (shape.size() > 1) res = res + indices.back();
return res;
}

Expr ExpandTo1DIndice(const std::vector<int> &shape, const std::vector<Expr> &indices) {
std::vector<Expr> shape_;
for (int v : shape) shape_.push_back(Expr(v));
return ExpandTo1DIndice(shape, indices);
}

} // namespace detail

Tensor _Tensor_::Make(const std::string &name, const std::vector<Expr> &shape, FunctionRef fn) {
CHECK(!shape.empty()) << "Tensor shape is set empty";
CHECK(!name.empty()) << "Tensor name is set empty";
Expand Down Expand Up @@ -89,7 +71,7 @@ Expr Tensor::operator()(const std::vector<Expr> &indices) const {
} else {
CHECK(node->buffer.defined()) << utils::StringFormat("Buffer for [%s] should be defined so that it can be sliced",
node->name.c_str());
return Load::Make(*this, node->AbsOffset(indices));
return Load::Make(*this, common::ExpandTo1DIndice(node->shape, indices));
}
}

Expand Down Expand Up @@ -240,7 +222,7 @@ Expr _Tensor_::tensor_store_expanded_body() {
}
}

return ir::Store::Make(Expr(Buffer(this)), final_body, detail::ExpandTo1DIndice(shape, axis_));
return ir::Store::Make(Expr(Buffer(this)), final_body, common::ExpandTo1DIndice(shape, axis_));
}

void _Tensor_::Bind(lang::Buffer &buffer) {
Expand All @@ -254,17 +236,6 @@ void _Tensor_::Bind(lang::Buffer &buffer) {
InitStage();
}

Expr _Tensor_::AbsOffset(const std::vector<Expr> &indice) const {
CHECK(!shape.empty());
CHECK_EQ(shape.size(), indice.size()) << "shape and indice not match";
Expr res = indice.front() * shape[1];
for (int i = 1; i < shape.size() - 1; i++) {
res = res + indice[i] * shape[i + 1];
}
if (shape.size() > 1) res = res + indice.back();
return res;
}

void Tensor::ExpandInlined() {
// Collect all the Calls with Tensors
// Expand all the uninlined tensor.
Expand Down
6 changes: 0 additions & 6 deletions cinn/lang/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,6 @@ namespace detail {
constexpr bool LE(int a, int b) { return a <= b; }
constexpr bool GE(int a, int b) { return a >= b; }

//! Expand milti-dim indices to 1-dim index.
Expr ExpandTo1DIndice(const std::vector<int>& shape, const std::vector<Expr>& indices);
Expr ExpandTo1DIndice(const std::vector<Expr>& shape, const std::vector<Expr>& indices);

} // namespace detail

class _Tensor_;
Expand Down Expand Up @@ -166,8 +162,6 @@ class _Tensor_ : public ExprNode<_Tensor_> {

~_Tensor_();

Expr AbsOffset(const std::vector<Expr>& indice) const;

private:
//! Create the polyhedral element for analysis.
//! It is based on the shape.
Expand Down

0 comments on commit 525f066

Please sign in to comment.