apache · leandron · Aug 25, 2022 · Aug 19, 2022
diff --git a/python/tvm/relay/op/contrib/ethosn.py b/python/tvm/relay/op/contrib/ethosn.py
@@ -215,6 +215,24 @@ def qnn_mul_pattern():
         input_is_right = gen_mul_inputs(is_constant(), wildcard())
         return input_is_left | input_is_right
 
+    def qnn_add_pattern():
+        add_op = is_op("qnn.add")
+        gen_add_inputs = lambda x, y: add_op(
+            x,
+            y,
+            is_constant(),
+            is_constant(),
+            is_constant(),
+            is_constant(),
+            is_constant(),
+            is_constant(),
+        )
+        two_inputs = gen_add_inputs(wildcard(), wildcard())
+        input_is_left = gen_add_inputs(wildcard(), is_constant())
+        input_is_right = gen_add_inputs(is_constant(), wildcard())
+
+        return input_is_left | input_is_right | two_inputs
+
     def check_conv2d(extract):
         """Check if a conv2d is supported by Ethos-N."""
         if not ethosn_available():
@@ -289,8 +307,24 @@ def check_resize(extract):
 
         return _ethosn.resize(extract)
 
+    def check_add(extract):
+        """Check if an addition is supported by Ethos-N."""
+        if not ethosn_available():
+            return False
+        # Do not support scalar constants for now
+        check_scalar = lambda i: isinstance(i, tvm.relay.Constant) and len(i.data.shape) == 0
+        if check_scalar(extract.args[0]) or check_scalar(extract.args[1]):
+            return False
+
+        inputs = extract.args[0:2]
+        if any([isinstance(i, tvm.relay.Constant) for i in inputs]):
+            extract = _ethosn.ConvertQnnAdd(extract)
+            return _ethosn.conv2d(extract)
+        return _ethosn.addition(extract)
+
     return [
         ("ethos-n.qnn_mul", qnn_mul_pattern(), check_mul),
+        ("ethos-n.qnn_add", qnn_add_pattern(), check_add),
         ("ethos-n.qnn_conv2d", qnn_conv_pattern(), check_conv2d),
         ("ethos-n.qnn_avg_pool2d", qnn_avg_pool2d_pattern(), check_avg_pool2d),
         ("ethos-n.qnn_sigmoid", qnn_sigmoid_pattern(), check_sigmoid),
@@ -332,15 +366,6 @@ def reshape(expr):
     return _ethosn.reshape(expr)
 
 
-@tvm.ir.register_op_attr("qnn.add", "target.ethos-n")
-def qnn_add(expr):
-    """Check if an addition is supported by Ethos-N."""
-    if not ethosn_available():
-        return False
-
-    return _ethosn.addition(expr)
-
-
 @tvm.ir.register_op_attr("qnn.concatenate", "target.ethos-n")
 def qnn_concatenate(expr):
     """Check if a concatenate is supported by Ethos-N."""

diff --git a/src/relay/backend/contrib/ethosn/codegen.cc b/src/relay/backend/contrib/ethosn/codegen.cc
@@ -104,9 +104,9 @@ void InferTensorsVisitor::InferCall(const CallNode* cn) {
     params.input_info = GetTensorInfo(tensor_table_, call);
     err += EthosnAPI::Reshape(call, &params);
     tensor_table_[cn->args[0]] = {params.input_info};
-  } else if (IsEthosnOp(call, "qnn.add")) {
+  } else if (IsEthosnFunc(call, "ethos-n.qnn_add")) {
     AdditionParams params;
-    err += EthosnAPI::Addition(call, &params);
+    err += EthosnAPI::Addition(cn->op.as<FunctionNode>()->body, &params);
     tensor_table_[cn->args[0]] = {params.lhs_info};
     tensor_table_[cn->args[1]] = {params.rhs_info};
   } else if (IsEthosnFunc(call, "ethos-n.qnn_sigmoid")) {
@@ -296,7 +296,7 @@ sl::TensorsAndId ConstructNetworkVisitor::HandleCall(const CallNode* cn) {
   } else if (IsEthosnOp(call, "reshape")) {
     if ((err = MakeReshapeLayer(call, &tensor))) ReportFatalError(call, err);
     return MakeOps(tensor);
-  } else if (IsEthosnOp(call, "qnn.add")) {
+  } else if (IsEthosnFunc(call, "ethos-n.qnn_add")) {
     if ((err = MakeAdditionLayer(call, &tensor))) ReportFatalError(call, err);
     return MakeOps(tensor);
   } else if (IsEthosnFunc(call, "ethos-n.qnn_sigmoid")) {
@@ -468,7 +468,7 @@ EthosnError ConstructNetworkVisitor::MakeReshapeLayer(const Call& call,
 EthosnError ConstructNetworkVisitor::MakeAdditionLayer(const Call& call,
                                                        sl::TensorAndId<sl::Operand>* out) {
   AdditionParams params;
-  if (auto err = EthosnAPI::Addition(call, &params)) {
+  if (auto err = EthosnAPI::Addition(call->op.as<FunctionNode>()->body, &params)) {
     return err;
   }
 

diff --git a/src/relay/backend/contrib/ethosn/convert_equivalent.cc b/src/relay/backend/contrib/ethosn/convert_equivalent.cc
@@ -38,6 +38,20 @@ namespace relay {
 namespace contrib {
 namespace ethosn {
 
+/*!
+ * \brief Apply constant folding on an expression.
+ *
+ * \param expr The expression to fold.
+ * \param fold_qnn Whether to fold constants for QNN operations.
+ * \returns The new folded expression.
+ */
+Expr FoldConstantExpr(const Expr& expr, bool fold_qnn = true) {
+  auto mod = IRModule::FromExpr(expr);
+  mod = transform::FoldConstant(fold_qnn)(mod);
+  auto entry_func = Downcast<Function>(mod->Lookup("main"));
+  return expr.as<FunctionNode>() == nullptr ? entry_func->body : entry_func;
+}
+
 /*!
  * \brief Converts qnn.mul to mathematically equivalent
  * qnn.conv2d depthwise operation.
@@ -65,7 +79,9 @@ Expr ConvertQnnMultiply(const Expr& expr) {
 
   const auto* input_constant = input2.as<ConstantNode>();
   ICHECK(input_constant) << "Expected ConstantNode but got " << input2->GetTypeKey();
-  const auto* input_constant_tt = input_constant->checked_type().as<TensorTypeNode>();
+  Type input_constant_type = input_constant->checked_type();
+  const auto* input_constant_tt = input_constant_type.as<TensorTypeNode>();
+  ICHECK(input_constant) << "Expected TensorTypeNode but got " << input_constant_type->GetTypeKey();
   int channels = input_constant_tt->shape.back().as<IntImmNode>()->value;
 
   runtime::NDArray input_data = input_constant->data;
@@ -93,6 +109,83 @@ Expr ConvertQnnMultiply(const Expr& expr) {
 TVM_REGISTER_GLOBAL("relay.backend.contrib.ethos-n.ConvertQnnMultiply")
     .set_body_typed(ConvertQnnMultiply);
 
+/*!
+ * \brief Converts qnn.add to a mathematically equivalent
+ * qnn.conv2d depthwise operation.
+ */
+Expr ConvertQnnAdd(const Expr& expr) {
+  Call call = Downcast<Call>(expr);
+
+  Expr input1 = call->args[0];
+  Expr input2 = call->args[1];
+  Expr input1_scale = call->args[2];
+  Expr input1_zero_point = call->args[3];
+  Expr input2_scale = call->args[4];
+  Expr input2_zero_point = call->args[5];
+  // Reverse the inputs if the constant is first input
+  if (call->args[0]->IsInstance<ConstantNode>()) {
+    input1 = call->args[1];
+    input2 = call->args[0];
+    input1_scale = call->args[4];
+    input1_zero_point = call->args[5];
+    input2_scale = call->args[2];
+    input2_zero_point = call->args[3];
+  }
+  Expr output_scale = call->args[6];
+  Expr output_zero_point = call->args[7];
+
+  const auto* input_constant = input2.as<ConstantNode>();
+  ICHECK(input_constant) << "Expected ConstantNode but got " << input2->GetTypeKey();
+  Type input_constant_type = input_constant->checked_type();
+  const auto* input_constant_tt = input_constant_type.as<TensorTypeNode>();
+  ICHECK(input_constant) << "Expected TensorTypeNode but got " << input_constant_type->GetTypeKey();
+  int channels = input_constant_tt->shape.back().as<IntImmNode>()->value;
+
+  // Create the identity kernel. The kernel data is constructed such that it produces an identity
+  // operation in the quantized space. Therefore, the input is not scaled in any way which allows
+  // us to later use the bias to perform the addition.
+  float input_scale_value = GetScalarFromConstant<float>(input1_scale);
+  float output_scale_value = GetScalarFromConstant<float>(output_scale);
+  float identity_kernel_scale_ub = std::min(output_scale_value / input_scale_value, 1.f);
+  float identity_kernel_scale_lb = (1.f / 255.f);
+  float identity_kernel_scale_target = (identity_kernel_scale_ub + identity_kernel_scale_lb) / 2.f;
+  float identity_kernel_scale_recip_rounded = std::round(1.f / identity_kernel_scale_target);
+  float identity_kernel_scale_value = 1.f / identity_kernel_scale_recip_rounded;
+  Constant identity_kernel_scale =
+      MakeConstantScalar(DataType::Float(32), identity_kernel_scale_value);
+  Constant identity_kernel_zero_point = MakeConstantScalar(DataType::Int(32), 0);
+  float identity_kernel_quantized_data = identity_kernel_scale_recip_rounded;
+  std::vector<uint8_t> identity_kernel_data(channels,
+                                            static_cast<uint8_t>(identity_kernel_quantized_data));
+  Constant identity_kernel =
+      MakeConstantTensor(input_constant_tt->dtype, {1, 1, channels, 1}, identity_kernel_data);
+
+  // Calculate the bias, this is where the addition happens. The bias values are calculated by
+  // scaling the constant input to input_scale * identity_kernel_scale.
+  Constant bias_scale =
+      MakeConstantScalar(DataType::Float(32), input_scale_value * identity_kernel_scale_value);
+  Constant bias_zero_point = MakeConstantScalar(DataType::Int(32), 0);
+  Expr requantize_bias =
+      qnn::MakeRequantize(input2, input2_scale, input2_zero_point, bias_scale, bias_zero_point, -1,
+                          "None", "None", DataType::Int(32));
+  Expr reshape_bias = MakeReshape(requantize_bias, {channels});
+  Constant bias = Downcast<Constant>(FoldConstantExpr(reshape_bias));
+
+  // Make depthwise conv2d operation
+  Expr conv2d =
+      qnn::MakeQnnConv2D(input1, identity_kernel, input1_zero_point, identity_kernel_zero_point,
+                         input1_scale, identity_kernel_scale, {1, 1}, {0, 0, 0, 0}, {1, 1},
+                         channels, channels, {1, 1}, "NHWC", "HWOI", "NHWC", DataType::Int(32));
+  Expr bias_add = MakeBiasAdd(conv2d, bias, 3);
+  Expr requantize =
+      qnn::MakeRequantize(bias_add, input1_scale, input1_zero_point, output_scale,
+                          output_zero_point, -1, "None", "None", input_constant_tt->dtype);
+
+  return InferType(requantize);
+}
+
+TVM_REGISTER_GLOBAL("relay.backend.contrib.ethos-n.ConvertQnnAdd").set_body_typed(ConvertQnnAdd);
+
 class ConvertEquivalentsMutator : public MixedModeMutator {
  public:
   Expr Rewrite_(const CallNode* pre, const Expr& post) override {
@@ -108,11 +201,25 @@ class ConvertEquivalentsMutator : public MixedModeMutator {
       Expr new_func_body = ConvertQnnMultiply(func->body);
       new_func = WithFields(func, func->params, new_func_body);
       new_func = WithAttr(std::move(new_func), attr::kComposite, String("ethos-n.qnn_conv2d"));
+    } else if (composite_name == "ethos-n.qnn_add" && CheckCanConvertAdd(func->body)) {
+      Expr new_func_body = ConvertQnnAdd(func->body);
+      new_func = WithFields(func, func->params, new_func_body);
+      new_func = WithAttr(std::move(new_func), attr::kComposite, String("ethos-n.qnn_conv2d"));
     }
 
     Call new_call = WithFields(call, new_func);
     return Downcast<Expr>(new_call);
   }
+
+ private:
+  /*!
+   * \brief Check whether add can be converted to depthwise, or whether
+   * it should be offloaded as a normal add operation.
+   */
+  bool CheckCanConvertAdd(const Expr& expr) {
+    Call call = Downcast<Call>(expr);
+    return call->args[0]->IsInstance<ConstantNode>() || call->args[1]->IsInstance<ConstantNode>();
+  }
 };
 
 tvm::transform::Pass ConvertEquivalents() {

diff --git a/tests/python/contrib/test_ethosn/infrastructure.py b/tests/python/contrib/test_ethosn/infrastructure.py
@@ -83,7 +83,8 @@ def make_module(func, params):
 
 def make_ethosn_composite(ethosn_expr, name):
     vars = relay.analysis.free_vars(ethosn_expr)
-    func = relay.Function([relay.Var("a")], ethosn_expr)
+    inner_vars = [relay.Var(v.name_hint, v.type_annotation) for v in vars]
+    func = relay.Function(inner_vars, ethosn_expr)
     func = func.with_attr("Composite", name)
     call = relay.Call(func, vars)
     return call