Skip to content

Commit

Permalink
add broadcast and elementwise PEs (PaddlePaddle#168)
Browse files Browse the repository at this point in the history
  • Loading branch information
wenming2014 authored Aug 12, 2020
1 parent f4cfb6d commit 75dbf7d
Show file tree
Hide file tree
Showing 25 changed files with 731 additions and 61 deletions.
7 changes: 7 additions & 0 deletions cinn/backends/extern_func_jit_register.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@
#define REGISTER_EXTERN_FUNC_ONE_IN_ONE_OUT(fn__, target__, in_type__, out_type__) \
REGISTER_EXTERN_FUNC_HELPER(fn__, target__).SetRetType<out_type__>().AddInputType<in_type__>().End()

#define REGISTER_EXTERN_FUNC_TWO_IN_ONE_OUT(fn__, target__, in_type1__, in_type2__, out_type__) \
REGISTER_EXTERN_FUNC_HELPER(fn__, target__) \
.SetRetType<out_type__>() \
.AddInputType<in_type1__>() \
.AddInputType<in_type2__>() \
.End()

#define REGISTER_EXTERN_FUNC(symbol__) bool __cinn__##symbol__##__registrar()
#define USE_EXTERN_FUNC(symbol__) \
extern bool __cinn__##symbol__##__registrar(); \
Expand Down
28 changes: 15 additions & 13 deletions cinn/backends/extern_func_protos.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,23 @@ namespace cinn {
namespace backends {

ExternFunctionProtoRegistry::ExternFunctionProtoRegistry() {
{
auto* n = detail::CreateTanhProto();
Register(n->name, n);
static const std::vector<std::string> extern_funcs_fp32 = {
"exp", "erf", "sigmoid", "sqrt", "log", "log2", "log10", "floor",
"ceil", "round", "trunc", "cos", "cosh", "tan", "sin", "sinh",
"acos", "acosh", "asin", "asinh", "atan", "atanh", "isnan", "tanh",
"isfinite", "isinf", "left_shift", "right_shift", "bitwise_or", "bitwise_and", "bitwise_xor", "bitwise_not"};
static const std::vector<std::string> extern_funcs_int64 = {
"left_shift", "right_shift", "bitwise_or", "bitwise_and", "bitwise_xor", "bitwise_not"};
for (int i = 0; i < extern_funcs_fp32.size(); ++i) {
auto* proto = new FunctionProto(extern_funcs_fp32[i], {Float(32)}, Float(32));
Register(proto->name, proto);
}
{
auto* n = detail::CreateTanhVProto();
Register(n->name, n);
for (int i = 0; i < extern_funcs_int64.size(); ++i) {
auto* proto = new FunctionProto(extern_funcs_int64[i], {Int(64)}, Int(64));
Register(proto->name, proto);
}

Register("cos", new FunctionProto("cos", {Float(32)}, Float(32)));
Register("sign", new FunctionProto("sign", {Float(32)}, Float(32)));
Register("sin", new FunctionProto("sin", {Float(32)}, Float(32)));
Register("tanh", new FunctionProto("tanh", {Float(32)}, Float(32)));
Register("log", new FunctionProto("log", {Float(32)}, Float(32)));
auto* n = detail::CreateTanhVProto();
Register(n->name, n);
}

ExternFunctionProtoRegistry& ExternFunctionProtoRegistry::Global() {
Expand All @@ -27,7 +30,6 @@ ExternFunctionProtoRegistry& ExternFunctionProtoRegistry::Global() {

namespace detail {

FunctionProto* CreateTanhProto() { return new FunctionProto(extern_func__tanh, {Float(32)}, {}, Float(32)); }
FunctionProto* CreateTanhVProto() {
return new FunctionProto(
extern_func__tanh_v, {type_of<float*>()}, {type_of<float*>()}, Void(), FunctionProto::ShapeFollowNthArgument(0));
Expand Down
1 change: 0 additions & 1 deletion cinn/backends/extern_func_protos.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ class ExternFunctionProtoRegistry : public FunctionProtoRegistry {

namespace detail {

FunctionProto* CreateTanhProto();
FunctionProto* CreateTanhVProto();

} // namespace detail
Expand Down
1 change: 1 addition & 0 deletions cinn/common/type.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ struct Type {
bool is_int(int bits = -1) const { return type_ == type_t::Int && (bits < 0 || bits == this->bits()); }
bool is_uint(int bits = -1) const { return type_ == type_t::UInt && (bits < 0 || bits == this->bits()); }
bool is_string() const { return type_ == type_t::String; }
bool is_index_type() { return is_int() && lanes() == 1 && (bits() == 32 || bits() == 64); }
// @}

Type& set_cpp_handle(bool x = true);
Expand Down
9 changes: 8 additions & 1 deletion cinn/hlir/pe/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
set(srcs
broadcast.cc)
broadcast.cc
elementwise.cc
nn.cc
reduction.cc
schedule.cc
transform.cc
vision.cc
)

foreach(cpp ${srcs})
set(core_src
Expand Down
44 changes: 27 additions & 17 deletions cinn/hlir/pe/broadcast.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,16 @@
#include <vector>

#include "cinn/common/ir_util.h"
#include "cinn/ir/ir_operators.h"
#include "cinn/ir/node.h"
#include "cinn/lang/compute.h"

namespace cinn {
namespace hlir {
namespace pe {

using namespace cinn::ir;
using cinn::common::make_zero;
using cinn::ir::_Var_;
using cinn::ir::Max;
using cinn::ir::Tensor;
using cinn::lang::Compute;

void GetBroadcastShape(const std::vector<Expr>& shape1,
Expand Down Expand Up @@ -115,16 +114,7 @@ Tensor Broadcast(const FuncOp& op, const Tensor& a, const Tensor& b, const std::
return output;
}

/**
* @brief Compute A && B with auto-broadcasting.
*
* @param A The first tensor or Expr
* @param B The second tensor or Expr
* @param output_name The name of the output Tensor
*
* @return The result Tensor or Expr.
*/
#define HLIR_DEFINE_BC_OP(name__, compute__) \
#define HLIR_IMP_BC_PE(name__, compute__) \
Tensor name__(const Tensor& A, const Tensor& B, const std::string& output_name) { \
auto fn = [&](const Expr& a, const Expr& b) { compute__ }; \
return Broadcast(fn, A, B, output_name); \
Expand All @@ -141,10 +131,30 @@ Tensor Broadcast(const FuncOp& op, const Tensor& a, const Tensor& b, const std::
} \
Expr name__(const Expr& a, const Expr& b) { compute__ }

HLIR_DEFINE_BC_OP(Add, return a + b;);
HLIR_DEFINE_BC_OP(Substract, return a - b;);
HLIR_DEFINE_BC_OP(Multiply, return a * b;);
HLIR_DEFINE_BC_OP(Divide, return a / b;);
HLIR_IMP_BC_PE(Add, return a + b;);
HLIR_IMP_BC_PE(Substract, return a - b;);
HLIR_IMP_BC_PE(Multiply, return a * b;);
HLIR_IMP_BC_PE(Divide, return a / b;);
HLIR_IMP_BC_PE(Floor_divide, return Floor(a / b););
HLIR_IMP_BC_PE(Mod, return a % b;);
HLIR_IMP_BC_PE(Floor_mod, return a - Floor(a / b) * b;);
HLIR_IMP_BC_PE(Maximum, return Max::Make(a, b););
HLIR_IMP_BC_PE(Minimum, return Min::Make(a, b););
HLIR_IMP_BC_PE(Power, return Power::Make(a, b););
HLIR_IMP_BC_PE(LeftShift, return a << b;);
HLIR_IMP_BC_PE(RightShift, return a >> b;);
HLIR_IMP_BC_PE(LogicaAnd, return a && b;);
HLIR_IMP_BC_PE(LogicalOr, return a || b;);
HLIR_IMP_BC_PE(LogicalXOr, return a ^ b;);
HLIR_IMP_BC_PE(BitwiseAnd, return a & b;);
HLIR_IMP_BC_PE(BitwiseOr, return a | b;);
HLIR_IMP_BC_PE(BitwiseXor, return a ^ b;);
HLIR_IMP_BC_PE(Greater, return a > b;);
HLIR_IMP_BC_PE(Less, return a < b;);
HLIR_IMP_BC_PE(Equal, return EQ::Make(a, b););
HLIR_IMP_BC_PE(NotEqual, return NE::Make(a, b););
HLIR_IMP_BC_PE(GreaterEqual, return a >= b;);
HLIR_IMP_BC_PE(LessEqual, return a <= b;);

} // namespace pe
} // namespace hlir
Expand Down
65 changes: 59 additions & 6 deletions cinn/hlir/pe/broadcast.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,66 @@
namespace cinn {
namespace hlir {
namespace pe {
#define HLIR_DCL_BC_OP(name__) \
ir::Tensor name__(const ir::Tensor& A, const ir::Tensor& B, const std::string& output_name = "");
/**
* @brief Compute A && B with auto-broadcasting.
*
* @param A The first Tensor or Expr
* @param B The second Tensor or Expr
* @param output_name The name of the output Tensor
*
* @return The result Tensor or Expr.
*/
#define HLIR_DCL_BC_PE(name__) \
ir::Tensor name__(const ir::Tensor& A, const ir::Tensor& B, const std::string& output_name = "T_" #name__ "_out");

HLIR_DCL_BC_OP(Add);
HLIR_DCL_BC_OP(Substract);
HLIR_DCL_BC_OP(Multiply);
HLIR_DCL_BC_OP(Divide);
//! Compute A + B with auto-broadcasting.
HLIR_DCL_BC_PE(Add);
//! Compute A - B with auto-broadcasting.
HLIR_DCL_BC_PE(Substract);
//! Compute A * B with auto-broadcasting.
HLIR_DCL_BC_PE(Multiply);
//! Compute A / B with auto-broadcasting.
HLIR_DCL_BC_PE(Divide);
//! Compute Floor(A / B) with auto-broadcasting.
HLIR_DCL_BC_PE(Floor_divide);
//! Compute A % B with auto-broadcasting.
HLIR_DCL_BC_PE(Mod);
//! Compute A - floor_div(A, B) * B with auto-broadcasting.
HLIR_DCL_BC_PE(Floor_mod);
//! Compute Maximum(A, B) with auto-broadcasting.
HLIR_DCL_BC_PE(Maximum);
//! Compute Minimum(A, B) with auto-broadcasting.
HLIR_DCL_BC_PE(Minimum);
//! Compute Power(A, B) with auto-broadcasting.
HLIR_DCL_BC_PE(Power);
//! Compute A << B with auto-broadcasting.
HLIR_DCL_BC_PE(LeftShift);
//! Compute A >> B with auto-broadcasting.
HLIR_DCL_BC_PE(RightShift);
//! Compute A && B with auto-broadcasting.
HLIR_DCL_BC_PE(LogicaAnd);
//! Compute A || B with auto-broadcasting.
HLIR_DCL_BC_PE(LogicalOr);
//! Compute A ^ B with auto-broadcasting.
HLIR_DCL_BC_PE(LogicalXOr);
//! Compute A & B with auto-broadcasting.
HLIR_DCL_BC_PE(BitwiseAnd);
//! Compute A | B with auto-broadcasting.
HLIR_DCL_BC_PE(BitwiseOr);
//! Compute A ^ B with auto-broadcasting.
HLIR_DCL_BC_PE(BitwiseXor);
//! Compute A > B with auto-broadcasting.
HLIR_DCL_BC_PE(Greater);
//! Compute A < B with auto-broadcasting.
HLIR_DCL_BC_PE(Less);
//! Compute A == B with auto-broadcasting.
HLIR_DCL_BC_PE(Equal);
//! Compute A != B with auto-broadcasting.
HLIR_DCL_BC_PE(NotEqual);
//! Compute A >= B with auto-broadcasting.
HLIR_DCL_BC_PE(GreaterEqual);
//! Compute A <= B with auto-broadcasting.
HLIR_DCL_BC_PE(LessEqual);

} // namespace pe
} // namespace hlir
Expand Down
59 changes: 59 additions & 0 deletions cinn/hlir/pe/elementwise.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#include "cinn/hlir/pe/elementwise.h"

#include <vector>

#include "cinn/ir/ir_operators.h"
#include "cinn/lang/compute.h"

namespace cinn {
namespace hlir {
namespace pe {

using cinn::lang::Compute;
using ir::Expr;
using ir::Tensor;

#define HLIR_IMP_UNARY_PE(name__) \
Tensor name__(const Tensor& A, const std::string& output_name) { \
return Compute( \
A->shape, [&](const std::vector<Expr>& indice) { return name__(A(indice)); }, output_name); \
}

HLIR_IMP_UNARY_PE(Exp);
HLIR_IMP_UNARY_PE(Erf);
HLIR_IMP_UNARY_PE(Sqrt);
HLIR_IMP_UNARY_PE(Log);
HLIR_IMP_UNARY_PE(Log2);
HLIR_IMP_UNARY_PE(Log10);
HLIR_IMP_UNARY_PE(Floor);
HLIR_IMP_UNARY_PE(Ceil);
HLIR_IMP_UNARY_PE(Round);
HLIR_IMP_UNARY_PE(Trunc);
HLIR_IMP_UNARY_PE(Cos);
HLIR_IMP_UNARY_PE(Cosh);
HLIR_IMP_UNARY_PE(Tan);
HLIR_IMP_UNARY_PE(Sin);
HLIR_IMP_UNARY_PE(Sinh);
HLIR_IMP_UNARY_PE(Acos);
HLIR_IMP_UNARY_PE(Acosh);
HLIR_IMP_UNARY_PE(Asin);
HLIR_IMP_UNARY_PE(Asinh);
HLIR_IMP_UNARY_PE(Atan);
HLIR_IMP_UNARY_PE(Atanh);
HLIR_IMP_UNARY_PE(Isnan);
HLIR_IMP_UNARY_PE(Tanh);
HLIR_IMP_UNARY_PE(Isfinite);
HLIR_IMP_UNARY_PE(Isinf);

HLIR_IMP_UNARY_PE(Negative);
HLIR_IMP_UNARY_PE(Identity);
HLIR_IMP_UNARY_PE(LogicalNot);
HLIR_IMP_UNARY_PE(BitwiseNot);
HLIR_IMP_UNARY_PE(Sigmoid);
HLIR_IMP_UNARY_PE(Sign);
HLIR_IMP_UNARY_PE(Abs);
HLIR_IMP_UNARY_PE(Rsqrt);

} // namespace pe
} // namespace hlir
} // namespace cinn
62 changes: 62 additions & 0 deletions cinn/hlir/pe/elementwise.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#pragma once

#include "cinn/ir/ir.h"

namespace cinn {
namespace hlir {
namespace pe {
/**
* @brief Unary primitive emitters
*
* @param A The input Tensor
* @param output_name The name of the output Tensor
*
* @return The result Tensor.
*/
#define HLIR_DCL_UNARY_PE(name__) \
ir::Tensor name__(const ir::Tensor& A, const std::string& output_name = "T_" #name__ "_out");

HLIR_DCL_UNARY_PE(Exp);
HLIR_DCL_UNARY_PE(Erf);
HLIR_DCL_UNARY_PE(Sqrt);
HLIR_DCL_UNARY_PE(Log);
HLIR_DCL_UNARY_PE(Log2);
HLIR_DCL_UNARY_PE(Log10);
HLIR_DCL_UNARY_PE(Floor);
HLIR_DCL_UNARY_PE(Ceil);
HLIR_DCL_UNARY_PE(Round);
HLIR_DCL_UNARY_PE(Trunc);
HLIR_DCL_UNARY_PE(Cos);
HLIR_DCL_UNARY_PE(Cosh);
HLIR_DCL_UNARY_PE(Tan);
HLIR_DCL_UNARY_PE(Sin);
HLIR_DCL_UNARY_PE(Sinh);
HLIR_DCL_UNARY_PE(Acos);
HLIR_DCL_UNARY_PE(Acosh);
HLIR_DCL_UNARY_PE(Asin);
HLIR_DCL_UNARY_PE(Asinh);
HLIR_DCL_UNARY_PE(Atan);
HLIR_DCL_UNARY_PE(Atanh);
HLIR_DCL_UNARY_PE(Isnan);
HLIR_DCL_UNARY_PE(Tanh);
HLIR_DCL_UNARY_PE(Isfinite);
HLIR_DCL_UNARY_PE(Isinf);

HLIR_DCL_UNARY_PE(Negative);
HLIR_DCL_UNARY_PE(Identity);
HLIR_DCL_UNARY_PE(LogicalNot);
HLIR_DCL_UNARY_PE(BitwiseNot);
HLIR_DCL_UNARY_PE(Sigmoid);
HLIR_DCL_UNARY_PE(Sign);
HLIR_DCL_UNARY_PE(Abs);
HLIR_DCL_UNARY_PE(Rsqrt);
HLIR_DCL_UNARY_PE(Cast);
HLIR_DCL_UNARY_PE(Clip);
HLIR_DCL_UNARY_PE(Reinterpret);
HLIR_DCL_UNARY_PE(ElementwiseSum);
HLIR_DCL_UNARY_PE(Full);
HLIR_DCL_UNARY_PE(FullLike);

} // namespace pe
} // namespace hlir
} // namespace cinn
25 changes: 25 additions & 0 deletions cinn/hlir/pe/nn.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#include "cinn/hlir/pe/nn.h"

#include "cinn/lang/compute.h"

namespace cinn {
namespace hlir {
namespace pe {

using cinn::lang::Compute;
using ir::Expr;
using ir::Tensor;

template <typename T>
Tensor Relu(const Tensor& A, T threshold, const std::string& output_name) {
return Compute(
A->shape, [&](const std::vector<Expr>& indice) { return Relu(A(indice), threshold); }, output_name);
}
Tensor LeakyRelu(const Tensor& A, double alpha, const std::string& output_name) {
return Compute(
A->shape, [&](const std::vector<Expr>& indice) { return LeakyRelu(A(indice), alpha); }, output_name);
}

} // namespace pe
} // namespace hlir
} // namespace cinn
16 changes: 16 additions & 0 deletions cinn/hlir/pe/nn.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include "cinn/ir/ir.h"

namespace cinn {
namespace hlir {
namespace pe {

template <typename T>
ir::Tensor Relu(const ir::Tensor& A, T threshold = static_cast<T>(0), const std::string& output_name = "T_Relu_out");

ir::Tensor LeakyRelu(const ir::Tensor& A, double alpha = 0.1, const std::string& output_name = "T_LeakyRelu_out");

} // namespace pe
} // namespace hlir
} // namespace cinn
Loading

0 comments on commit 75dbf7d

Please sign in to comment.