add broadcast and elementwise PEs (PaddlePaddle#168)

thisjiang · Aug 12, 2020 · 75dbf7d · 75dbf7d
1 parent f4cfb6d
commit 75dbf7d
Show file tree

Hide file tree

Showing 25 changed files with 731 additions and 61 deletions.
diff --git a/cinn/backends/extern_func_jit_register.h b/cinn/backends/extern_func_jit_register.h
@@ -28,6 +28,13 @@
 #define REGISTER_EXTERN_FUNC_ONE_IN_ONE_OUT(fn__, target__, in_type__, out_type__) \
   REGISTER_EXTERN_FUNC_HELPER(fn__, target__).SetRetType<out_type__>().AddInputType<in_type__>().End()
 
+#define REGISTER_EXTERN_FUNC_TWO_IN_ONE_OUT(fn__, target__, in_type1__, in_type2__, out_type__) \
+  REGISTER_EXTERN_FUNC_HELPER(fn__, target__)                                                   \
+      .SetRetType<out_type__>()                                                                 \
+      .AddInputType<in_type1__>()                                                               \
+      .AddInputType<in_type2__>()                                                               \
+      .End()
+
 #define REGISTER_EXTERN_FUNC(symbol__) bool __cinn__##symbol__##__registrar()
 #define USE_EXTERN_FUNC(symbol__)                \
   extern bool __cinn__##symbol__##__registrar(); \

diff --git a/cinn/backends/extern_func_protos.cc b/cinn/backends/extern_func_protos.cc
@@ -4,20 +4,23 @@ namespace cinn {
 namespace backends {
 
 ExternFunctionProtoRegistry::ExternFunctionProtoRegistry() {
-  {
-    auto* n = detail::CreateTanhProto();
-    Register(n->name, n);
+  static const std::vector<std::string> extern_funcs_fp32 = {
+      "exp",      "erf",   "sigmoid",    "sqrt",        "log",        "log2",        "log10",       "floor",
+      "ceil",     "round", "trunc",      "cos",         "cosh",       "tan",         "sin",         "sinh",
+      "acos",     "acosh", "asin",       "asinh",       "atan",       "atanh",       "isnan",       "tanh",
+      "isfinite", "isinf", "left_shift", "right_shift", "bitwise_or", "bitwise_and", "bitwise_xor", "bitwise_not"};
+  static const std::vector<std::string> extern_funcs_int64 = {
+      "left_shift", "right_shift", "bitwise_or", "bitwise_and", "bitwise_xor", "bitwise_not"};
+  for (int i = 0; i < extern_funcs_fp32.size(); ++i) {
+    auto* proto = new FunctionProto(extern_funcs_fp32[i], {Float(32)}, Float(32));
+    Register(proto->name, proto);
   }
-  {
-    auto* n = detail::CreateTanhVProto();
-    Register(n->name, n);
+  for (int i = 0; i < extern_funcs_int64.size(); ++i) {
+    auto* proto = new FunctionProto(extern_funcs_int64[i], {Int(64)}, Int(64));
+    Register(proto->name, proto);
   }
-
-  Register("cos", new FunctionProto("cos", {Float(32)}, Float(32)));
-  Register("sign", new FunctionProto("sign", {Float(32)}, Float(32)));
-  Register("sin", new FunctionProto("sin", {Float(32)}, Float(32)));
-  Register("tanh", new FunctionProto("tanh", {Float(32)}, Float(32)));
-  Register("log", new FunctionProto("log", {Float(32)}, Float(32)));
+  auto* n = detail::CreateTanhVProto();
+  Register(n->name, n);
 }
 
 ExternFunctionProtoRegistry& ExternFunctionProtoRegistry::Global() {
@@ -27,7 +30,6 @@ ExternFunctionProtoRegistry& ExternFunctionProtoRegistry::Global() {
 
 namespace detail {
 
-FunctionProto* CreateTanhProto() { return new FunctionProto(extern_func__tanh, {Float(32)}, {}, Float(32)); }
 FunctionProto* CreateTanhVProto() {
   return new FunctionProto(
       extern_func__tanh_v, {type_of<float*>()}, {type_of<float*>()}, Void(), FunctionProto::ShapeFollowNthArgument(0));

diff --git a/cinn/backends/extern_func_protos.h b/cinn/backends/extern_func_protos.h
@@ -22,7 +22,6 @@ class ExternFunctionProtoRegistry : public FunctionProtoRegistry {
 
 namespace detail {
 
-FunctionProto* CreateTanhProto();
 FunctionProto* CreateTanhVProto();
 
 }  // namespace detail

diff --git a/cinn/common/type.h b/cinn/common/type.h
@@ -53,6 +53,7 @@ struct Type {
   bool is_int(int bits = -1) const { return type_ == type_t::Int && (bits < 0 || bits == this->bits()); }
   bool is_uint(int bits = -1) const { return type_ == type_t::UInt && (bits < 0 || bits == this->bits()); }
   bool is_string() const { return type_ == type_t::String; }
+  bool is_index_type() { return is_int() && lanes() == 1 && (bits() == 32 || bits() == 64); }
   // @}
 
   Type& set_cpp_handle(bool x = true);

diff --git a/cinn/hlir/pe/CMakeLists.txt b/cinn/hlir/pe/CMakeLists.txt
@@ -1,5 +1,12 @@
 set(srcs
-        broadcast.cc)
+        broadcast.cc
+        elementwise.cc
+        nn.cc
+        reduction.cc
+        schedule.cc
+        transform.cc
+        vision.cc
+        )
 
 foreach(cpp ${srcs})
   set(core_src

diff --git a/cinn/hlir/pe/broadcast.cc b/cinn/hlir/pe/broadcast.cc
@@ -4,17 +4,16 @@
 #include <vector>
 
 #include "cinn/common/ir_util.h"
+#include "cinn/ir/ir_operators.h"
 #include "cinn/ir/node.h"
 #include "cinn/lang/compute.h"
 
 namespace cinn {
 namespace hlir {
 namespace pe {
 
+using namespace cinn::ir;
 using cinn::common::make_zero;
-using cinn::ir::_Var_;
-using cinn::ir::Max;
-using cinn::ir::Tensor;
 using cinn::lang::Compute;
 
 void GetBroadcastShape(const std::vector<Expr>& shape1,
@@ -115,16 +114,7 @@ Tensor Broadcast(const FuncOp& op, const Tensor& a, const Tensor& b, const std::
   return output;
 }
 
-/**
- * @brief Compute A && B with auto-broadcasting.
- *
- * @param A The first tensor or Expr
- * @param B The second tensor or Expr
- * @param output_name The name of the output Tensor
- *
- * @return The result Tensor or Expr.
- */
-#define HLIR_DEFINE_BC_OP(name__, compute__)                                                       \
+#define HLIR_IMP_BC_PE(name__, compute__)                                                          \
   Tensor name__(const Tensor& A, const Tensor& B, const std::string& output_name) {                \
     auto fn = [&](const Expr& a, const Expr& b) { compute__ };                                     \
     return Broadcast(fn, A, B, output_name);                                                       \
@@ -141,10 +131,30 @@ Tensor Broadcast(const FuncOp& op, const Tensor& a, const Tensor& b, const std::
   }                                                                                                \
   Expr name__(const Expr& a, const Expr& b) { compute__ }
 
-HLIR_DEFINE_BC_OP(Add, return a + b;);
-HLIR_DEFINE_BC_OP(Substract, return a - b;);
-HLIR_DEFINE_BC_OP(Multiply, return a * b;);
-HLIR_DEFINE_BC_OP(Divide, return a / b;);
+HLIR_IMP_BC_PE(Add, return a + b;);
+HLIR_IMP_BC_PE(Substract, return a - b;);
+HLIR_IMP_BC_PE(Multiply, return a * b;);
+HLIR_IMP_BC_PE(Divide, return a / b;);
+HLIR_IMP_BC_PE(Floor_divide, return Floor(a / b););
+HLIR_IMP_BC_PE(Mod, return a % b;);
+HLIR_IMP_BC_PE(Floor_mod, return a - Floor(a / b) * b;);
+HLIR_IMP_BC_PE(Maximum, return Max::Make(a, b););
+HLIR_IMP_BC_PE(Minimum, return Min::Make(a, b););
+HLIR_IMP_BC_PE(Power, return Power::Make(a, b););
+HLIR_IMP_BC_PE(LeftShift, return a << b;);
+HLIR_IMP_BC_PE(RightShift, return a >> b;);
+HLIR_IMP_BC_PE(LogicaAnd, return a && b;);
+HLIR_IMP_BC_PE(LogicalOr, return a || b;);
+HLIR_IMP_BC_PE(LogicalXOr, return a ^ b;);
+HLIR_IMP_BC_PE(BitwiseAnd, return a & b;);
+HLIR_IMP_BC_PE(BitwiseOr, return a | b;);
+HLIR_IMP_BC_PE(BitwiseXor, return a ^ b;);
+HLIR_IMP_BC_PE(Greater, return a > b;);
+HLIR_IMP_BC_PE(Less, return a < b;);
+HLIR_IMP_BC_PE(Equal, return EQ::Make(a, b););
+HLIR_IMP_BC_PE(NotEqual, return NE::Make(a, b););
+HLIR_IMP_BC_PE(GreaterEqual, return a >= b;);
+HLIR_IMP_BC_PE(LessEqual, return a <= b;);
 
 }  // namespace pe
 }  // namespace hlir

diff --git a/cinn/hlir/pe/broadcast.h b/cinn/hlir/pe/broadcast.h
@@ -6,13 +6,66 @@
 namespace cinn {
 namespace hlir {
 namespace pe {
-#define HLIR_DCL_BC_OP(name__) \
-  ir::Tensor name__(const ir::Tensor& A, const ir::Tensor& B, const std::string& output_name = "");
+/**
+ * @brief Compute A && B with auto-broadcasting.
+ *
+ * @param A The first Tensor or Expr
+ * @param B The second Tensor or Expr
+ * @param output_name The name of the output Tensor
+ *
+ * @return The result Tensor or Expr.
+ */
+#define HLIR_DCL_BC_PE(name__) \
+  ir::Tensor name__(const ir::Tensor& A, const ir::Tensor& B, const std::string& output_name = "T_" #name__ "_out");
 
-HLIR_DCL_BC_OP(Add);
-HLIR_DCL_BC_OP(Substract);
-HLIR_DCL_BC_OP(Multiply);
-HLIR_DCL_BC_OP(Divide);
+//! Compute A + B with auto-broadcasting.
+HLIR_DCL_BC_PE(Add);
+//! Compute A - B with auto-broadcasting.
+HLIR_DCL_BC_PE(Substract);
+//! Compute A * B with auto-broadcasting.
+HLIR_DCL_BC_PE(Multiply);
+//! Compute A / B with auto-broadcasting.
+HLIR_DCL_BC_PE(Divide);
+//! Compute Floor(A / B) with auto-broadcasting.
+HLIR_DCL_BC_PE(Floor_divide);
+//! Compute A % B with auto-broadcasting.
+HLIR_DCL_BC_PE(Mod);
+//! Compute A - floor_div(A, B) * B with auto-broadcasting.
+HLIR_DCL_BC_PE(Floor_mod);
+//! Compute Maximum(A, B) with auto-broadcasting.
+HLIR_DCL_BC_PE(Maximum);
+//! Compute Minimum(A, B) with auto-broadcasting.
+HLIR_DCL_BC_PE(Minimum);
+//! Compute Power(A, B) with auto-broadcasting.
+HLIR_DCL_BC_PE(Power);
+//! Compute A << B with auto-broadcasting.
+HLIR_DCL_BC_PE(LeftShift);
+//! Compute A >> B with auto-broadcasting.
+HLIR_DCL_BC_PE(RightShift);
+//! Compute A && B with auto-broadcasting.
+HLIR_DCL_BC_PE(LogicaAnd);
+//! Compute A || B with auto-broadcasting.
+HLIR_DCL_BC_PE(LogicalOr);
+//! Compute A ^ B with auto-broadcasting.
+HLIR_DCL_BC_PE(LogicalXOr);
+//! Compute A & B with auto-broadcasting.
+HLIR_DCL_BC_PE(BitwiseAnd);
+//! Compute A | B with auto-broadcasting.
+HLIR_DCL_BC_PE(BitwiseOr);
+//! Compute A ^ B with auto-broadcasting.
+HLIR_DCL_BC_PE(BitwiseXor);
+//! Compute A > B with auto-broadcasting.
+HLIR_DCL_BC_PE(Greater);
+//! Compute A < B with auto-broadcasting.
+HLIR_DCL_BC_PE(Less);
+//! Compute A == B with auto-broadcasting.
+HLIR_DCL_BC_PE(Equal);
+//! Compute A != B with auto-broadcasting.
+HLIR_DCL_BC_PE(NotEqual);
+//! Compute A >= B with auto-broadcasting.
+HLIR_DCL_BC_PE(GreaterEqual);
+//! Compute A <= B with auto-broadcasting.
+HLIR_DCL_BC_PE(LessEqual);
 
 }  // namespace pe
 }  // namespace hlir

diff --git a/cinn/hlir/pe/elementwise.cc b/cinn/hlir/pe/elementwise.cc
@@ -0,0 +1,59 @@
+#include "cinn/hlir/pe/elementwise.h"
+
+#include <vector>
+
+#include "cinn/ir/ir_operators.h"
+#include "cinn/lang/compute.h"
+
+namespace cinn {
+namespace hlir {
+namespace pe {
+
+using cinn::lang::Compute;
+using ir::Expr;
+using ir::Tensor;
+
+#define HLIR_IMP_UNARY_PE(name__)                                                                   \
+  Tensor name__(const Tensor& A, const std::string& output_name) {                                  \
+    return Compute(                                                                                 \
+        A->shape, [&](const std::vector<Expr>& indice) { return name__(A(indice)); }, output_name); \
+  }
+
+HLIR_IMP_UNARY_PE(Exp);
+HLIR_IMP_UNARY_PE(Erf);
+HLIR_IMP_UNARY_PE(Sqrt);
+HLIR_IMP_UNARY_PE(Log);
+HLIR_IMP_UNARY_PE(Log2);
+HLIR_IMP_UNARY_PE(Log10);
+HLIR_IMP_UNARY_PE(Floor);
+HLIR_IMP_UNARY_PE(Ceil);
+HLIR_IMP_UNARY_PE(Round);
+HLIR_IMP_UNARY_PE(Trunc);
+HLIR_IMP_UNARY_PE(Cos);
+HLIR_IMP_UNARY_PE(Cosh);
+HLIR_IMP_UNARY_PE(Tan);
+HLIR_IMP_UNARY_PE(Sin);
+HLIR_IMP_UNARY_PE(Sinh);
+HLIR_IMP_UNARY_PE(Acos);
+HLIR_IMP_UNARY_PE(Acosh);
+HLIR_IMP_UNARY_PE(Asin);
+HLIR_IMP_UNARY_PE(Asinh);
+HLIR_IMP_UNARY_PE(Atan);
+HLIR_IMP_UNARY_PE(Atanh);
+HLIR_IMP_UNARY_PE(Isnan);
+HLIR_IMP_UNARY_PE(Tanh);
+HLIR_IMP_UNARY_PE(Isfinite);
+HLIR_IMP_UNARY_PE(Isinf);
+
+HLIR_IMP_UNARY_PE(Negative);
+HLIR_IMP_UNARY_PE(Identity);
+HLIR_IMP_UNARY_PE(LogicalNot);
+HLIR_IMP_UNARY_PE(BitwiseNot);
+HLIR_IMP_UNARY_PE(Sigmoid);
+HLIR_IMP_UNARY_PE(Sign);
+HLIR_IMP_UNARY_PE(Abs);
+HLIR_IMP_UNARY_PE(Rsqrt);
+
+}  // namespace pe
+}  // namespace hlir
+}  // namespace cinn
diff --git a/cinn/hlir/pe/elementwise.h b/cinn/hlir/pe/elementwise.h
@@ -0,0 +1,62 @@
+#pragma once
+
+#include "cinn/ir/ir.h"
+
+namespace cinn {
+namespace hlir {
+namespace pe {
+/**
+ * @brief Unary primitive emitters
+ *
+ * @param A The input Tensor
+ * @param output_name The name of the output Tensor
+ *
+ * @return The result Tensor.
+ */
+#define HLIR_DCL_UNARY_PE(name__) \
+  ir::Tensor name__(const ir::Tensor& A, const std::string& output_name = "T_" #name__ "_out");
+
+HLIR_DCL_UNARY_PE(Exp);
+HLIR_DCL_UNARY_PE(Erf);
+HLIR_DCL_UNARY_PE(Sqrt);
+HLIR_DCL_UNARY_PE(Log);
+HLIR_DCL_UNARY_PE(Log2);
+HLIR_DCL_UNARY_PE(Log10);
+HLIR_DCL_UNARY_PE(Floor);
+HLIR_DCL_UNARY_PE(Ceil);
+HLIR_DCL_UNARY_PE(Round);
+HLIR_DCL_UNARY_PE(Trunc);
+HLIR_DCL_UNARY_PE(Cos);
+HLIR_DCL_UNARY_PE(Cosh);
+HLIR_DCL_UNARY_PE(Tan);
+HLIR_DCL_UNARY_PE(Sin);
+HLIR_DCL_UNARY_PE(Sinh);
+HLIR_DCL_UNARY_PE(Acos);
+HLIR_DCL_UNARY_PE(Acosh);
+HLIR_DCL_UNARY_PE(Asin);
+HLIR_DCL_UNARY_PE(Asinh);
+HLIR_DCL_UNARY_PE(Atan);
+HLIR_DCL_UNARY_PE(Atanh);
+HLIR_DCL_UNARY_PE(Isnan);
+HLIR_DCL_UNARY_PE(Tanh);
+HLIR_DCL_UNARY_PE(Isfinite);
+HLIR_DCL_UNARY_PE(Isinf);
+
+HLIR_DCL_UNARY_PE(Negative);
+HLIR_DCL_UNARY_PE(Identity);
+HLIR_DCL_UNARY_PE(LogicalNot);
+HLIR_DCL_UNARY_PE(BitwiseNot);
+HLIR_DCL_UNARY_PE(Sigmoid);
+HLIR_DCL_UNARY_PE(Sign);
+HLIR_DCL_UNARY_PE(Abs);
+HLIR_DCL_UNARY_PE(Rsqrt);
+HLIR_DCL_UNARY_PE(Cast);
+HLIR_DCL_UNARY_PE(Clip);
+HLIR_DCL_UNARY_PE(Reinterpret);
+HLIR_DCL_UNARY_PE(ElementwiseSum);
+HLIR_DCL_UNARY_PE(Full);
+HLIR_DCL_UNARY_PE(FullLike);
+
+}  // namespace pe
+}  // namespace hlir
+}  // namespace cinn
diff --git a/cinn/hlir/pe/nn.cc b/cinn/hlir/pe/nn.cc
@@ -0,0 +1,25 @@
+#include "cinn/hlir/pe/nn.h"
+
+#include "cinn/lang/compute.h"
+
+namespace cinn {
+namespace hlir {
+namespace pe {
+
+using cinn::lang::Compute;
+using ir::Expr;
+using ir::Tensor;
+
+template <typename T>
+Tensor Relu(const Tensor& A, T threshold, const std::string& output_name) {
+  return Compute(
+      A->shape, [&](const std::vector<Expr>& indice) { return Relu(A(indice), threshold); }, output_name);
+}
+Tensor LeakyRelu(const Tensor& A, double alpha, const std::string& output_name) {
+  return Compute(
+      A->shape, [&](const std::vector<Expr>& indice) { return LeakyRelu(A(indice), alpha); }, output_name);
+}
+
+}  // namespace pe
+}  // namespace hlir
+}  // namespace cinn
diff --git a/cinn/hlir/pe/nn.h b/cinn/hlir/pe/nn.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include "cinn/ir/ir.h"
+
+namespace cinn {
+namespace hlir {
+namespace pe {
+
+template <typename T>
+ir::Tensor Relu(const ir::Tensor& A, T threshold = static_cast<T>(0), const std::string& output_name = "T_Relu_out");
+
+ir::Tensor LeakyRelu(const ir::Tensor& A, double alpha = 0.1, const std::string& output_name = "T_LeakyRelu_out");
+
+}  // namespace pe
+}  // namespace hlir
+}  // namespace cinn