Skip to content
This repository has been archived by the owner on Nov 25, 2022. It is now read-only.

Commit

Permalink
[ETHOSN] Support conversion of add to depthwise (apache#12531)
Browse files Browse the repository at this point in the history
In similar fashion to the conversion of mul to depthwise, this commit
converts add when one input is a constant of shape [1, ..., n] to a
depthwise convolution. If neither input is a constant, the add is
offloaded naturally like before.

The addition testing has been improved to use pytest features.
  • Loading branch information
lhutton1 authored and xinetzone committed Nov 25, 2022
1 parent f562d40 commit 0e6ae97
Show file tree
Hide file tree
Showing 7 changed files with 377 additions and 101 deletions.
43 changes: 34 additions & 9 deletions python/tvm/relay/op/contrib/ethosn.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,24 @@ def qnn_mul_pattern():
input_is_right = gen_mul_inputs(is_constant(), wildcard())
return input_is_left | input_is_right

def qnn_add_pattern():
add_op = is_op("qnn.add")
gen_add_inputs = lambda x, y: add_op(
x,
y,
is_constant(),
is_constant(),
is_constant(),
is_constant(),
is_constant(),
is_constant(),
)
two_inputs = gen_add_inputs(wildcard(), wildcard())
input_is_left = gen_add_inputs(wildcard(), is_constant())
input_is_right = gen_add_inputs(is_constant(), wildcard())

return input_is_left | input_is_right | two_inputs

def check_conv2d(extract):
"""Check if a conv2d is supported by Ethos-N."""
if not ethosn_available():
Expand Down Expand Up @@ -289,8 +307,24 @@ def check_resize(extract):

return _ethosn.resize(extract)

def check_add(extract):
"""Check if an addition is supported by Ethos-N."""
if not ethosn_available():
return False
# Do not support scalar constants for now
check_scalar = lambda i: isinstance(i, tvm.relay.Constant) and len(i.data.shape) == 0
if check_scalar(extract.args[0]) or check_scalar(extract.args[1]):
return False

inputs = extract.args[0:2]
if any([isinstance(i, tvm.relay.Constant) for i in inputs]):
extract = _ethosn.ConvertQnnAdd(extract)
return _ethosn.conv2d(extract)
return _ethosn.addition(extract)

return [
("ethos-n.qnn_mul", qnn_mul_pattern(), check_mul),
("ethos-n.qnn_add", qnn_add_pattern(), check_add),
("ethos-n.qnn_conv2d", qnn_conv_pattern(), check_conv2d),
("ethos-n.qnn_avg_pool2d", qnn_avg_pool2d_pattern(), check_avg_pool2d),
("ethos-n.qnn_sigmoid", qnn_sigmoid_pattern(), check_sigmoid),
Expand Down Expand Up @@ -332,15 +366,6 @@ def reshape(expr):
return _ethosn.reshape(expr)


@tvm.ir.register_op_attr("qnn.add", "target.ethos-n")
def qnn_add(expr):
"""Check if an addition is supported by Ethos-N."""
if not ethosn_available():
return False

return _ethosn.addition(expr)


@tvm.ir.register_op_attr("qnn.concatenate", "target.ethos-n")
def qnn_concatenate(expr):
"""Check if a concatenate is supported by Ethos-N."""
Expand Down
8 changes: 4 additions & 4 deletions src/relay/backend/contrib/ethosn/codegen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,9 @@ void InferTensorsVisitor::InferCall(const CallNode* cn) {
params.input_info = GetTensorInfo(tensor_table_, call);
err += EthosnAPI::Reshape(call, &params);
tensor_table_[cn->args[0]] = {params.input_info};
} else if (IsEthosnOp(call, "qnn.add")) {
} else if (IsEthosnFunc(call, "ethos-n.qnn_add")) {
AdditionParams params;
err += EthosnAPI::Addition(call, &params);
err += EthosnAPI::Addition(cn->op.as<FunctionNode>()->body, &params);
tensor_table_[cn->args[0]] = {params.lhs_info};
tensor_table_[cn->args[1]] = {params.rhs_info};
} else if (IsEthosnFunc(call, "ethos-n.qnn_sigmoid")) {
Expand Down Expand Up @@ -296,7 +296,7 @@ sl::TensorsAndId ConstructNetworkVisitor::HandleCall(const CallNode* cn) {
} else if (IsEthosnOp(call, "reshape")) {
if ((err = MakeReshapeLayer(call, &tensor))) ReportFatalError(call, err);
return MakeOps(tensor);
} else if (IsEthosnOp(call, "qnn.add")) {
} else if (IsEthosnFunc(call, "ethos-n.qnn_add")) {
if ((err = MakeAdditionLayer(call, &tensor))) ReportFatalError(call, err);
return MakeOps(tensor);
} else if (IsEthosnFunc(call, "ethos-n.qnn_sigmoid")) {
Expand Down Expand Up @@ -468,7 +468,7 @@ EthosnError ConstructNetworkVisitor::MakeReshapeLayer(const Call& call,
EthosnError ConstructNetworkVisitor::MakeAdditionLayer(const Call& call,
sl::TensorAndId<sl::Operand>* out) {
AdditionParams params;
if (auto err = EthosnAPI::Addition(call, &params)) {
if (auto err = EthosnAPI::Addition(call->op.as<FunctionNode>()->body, &params)) {
return err;
}

Expand Down
109 changes: 108 additions & 1 deletion src/relay/backend/contrib/ethosn/convert_equivalent.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,20 @@ namespace relay {
namespace contrib {
namespace ethosn {

/*!
* \brief Apply constant folding on an expression.
*
* \param expr The expression to fold.
* \param fold_qnn Whether to fold constants for QNN operations.
* \returns The new folded expression.
*/
Expr FoldConstantExpr(const Expr& expr, bool fold_qnn = true) {
auto mod = IRModule::FromExpr(expr);
mod = transform::FoldConstant(fold_qnn)(mod);
auto entry_func = Downcast<Function>(mod->Lookup("main"));
return expr.as<FunctionNode>() == nullptr ? entry_func->body : entry_func;
}

/*!
* \brief Converts qnn.mul to mathematically equivalent
* qnn.conv2d depthwise operation.
Expand Down Expand Up @@ -65,7 +79,9 @@ Expr ConvertQnnMultiply(const Expr& expr) {

const auto* input_constant = input2.as<ConstantNode>();
ICHECK(input_constant) << "Expected ConstantNode but got " << input2->GetTypeKey();
const auto* input_constant_tt = input_constant->checked_type().as<TensorTypeNode>();
Type input_constant_type = input_constant->checked_type();
const auto* input_constant_tt = input_constant_type.as<TensorTypeNode>();
ICHECK(input_constant) << "Expected TensorTypeNode but got " << input_constant_type->GetTypeKey();
int channels = input_constant_tt->shape.back().as<IntImmNode>()->value;

runtime::NDArray input_data = input_constant->data;
Expand Down Expand Up @@ -93,6 +109,83 @@ Expr ConvertQnnMultiply(const Expr& expr) {
TVM_REGISTER_GLOBAL("relay.backend.contrib.ethos-n.ConvertQnnMultiply")
.set_body_typed(ConvertQnnMultiply);

/*!
* \brief Converts qnn.add to a mathematically equivalent
* qnn.conv2d depthwise operation.
*/
Expr ConvertQnnAdd(const Expr& expr) {
Call call = Downcast<Call>(expr);

Expr input1 = call->args[0];
Expr input2 = call->args[1];
Expr input1_scale = call->args[2];
Expr input1_zero_point = call->args[3];
Expr input2_scale = call->args[4];
Expr input2_zero_point = call->args[5];
// Reverse the inputs if the constant is first input
if (call->args[0]->IsInstance<ConstantNode>()) {
input1 = call->args[1];
input2 = call->args[0];
input1_scale = call->args[4];
input1_zero_point = call->args[5];
input2_scale = call->args[2];
input2_zero_point = call->args[3];
}
Expr output_scale = call->args[6];
Expr output_zero_point = call->args[7];

const auto* input_constant = input2.as<ConstantNode>();
ICHECK(input_constant) << "Expected ConstantNode but got " << input2->GetTypeKey();
Type input_constant_type = input_constant->checked_type();
const auto* input_constant_tt = input_constant_type.as<TensorTypeNode>();
ICHECK(input_constant) << "Expected TensorTypeNode but got " << input_constant_type->GetTypeKey();
int channels = input_constant_tt->shape.back().as<IntImmNode>()->value;

// Create the identity kernel. The kernel data is constructed such that it produces an identity
// operation in the quantized space. Therefore, the input is not scaled in any way which allows
// us to later use the bias to perform the addition.
float input_scale_value = GetScalarFromConstant<float>(input1_scale);
float output_scale_value = GetScalarFromConstant<float>(output_scale);
float identity_kernel_scale_ub = std::min(output_scale_value / input_scale_value, 1.f);
float identity_kernel_scale_lb = (1.f / 255.f);
float identity_kernel_scale_target = (identity_kernel_scale_ub + identity_kernel_scale_lb) / 2.f;
float identity_kernel_scale_recip_rounded = std::round(1.f / identity_kernel_scale_target);
float identity_kernel_scale_value = 1.f / identity_kernel_scale_recip_rounded;
Constant identity_kernel_scale =
MakeConstantScalar(DataType::Float(32), identity_kernel_scale_value);
Constant identity_kernel_zero_point = MakeConstantScalar(DataType::Int(32), 0);
float identity_kernel_quantized_data = identity_kernel_scale_recip_rounded;
std::vector<uint8_t> identity_kernel_data(channels,
static_cast<uint8_t>(identity_kernel_quantized_data));
Constant identity_kernel =
MakeConstantTensor(input_constant_tt->dtype, {1, 1, channels, 1}, identity_kernel_data);

// Calculate the bias, this is where the addition happens. The bias values are calculated by
// scaling the constant input to input_scale * identity_kernel_scale.
Constant bias_scale =
MakeConstantScalar(DataType::Float(32), input_scale_value * identity_kernel_scale_value);
Constant bias_zero_point = MakeConstantScalar(DataType::Int(32), 0);
Expr requantize_bias =
qnn::MakeRequantize(input2, input2_scale, input2_zero_point, bias_scale, bias_zero_point, -1,
"None", "None", DataType::Int(32));
Expr reshape_bias = MakeReshape(requantize_bias, {channels});
Constant bias = Downcast<Constant>(FoldConstantExpr(reshape_bias));

// Make depthwise conv2d operation
Expr conv2d =
qnn::MakeQnnConv2D(input1, identity_kernel, input1_zero_point, identity_kernel_zero_point,
input1_scale, identity_kernel_scale, {1, 1}, {0, 0, 0, 0}, {1, 1},
channels, channels, {1, 1}, "NHWC", "HWOI", "NHWC", DataType::Int(32));
Expr bias_add = MakeBiasAdd(conv2d, bias, 3);
Expr requantize =
qnn::MakeRequantize(bias_add, input1_scale, input1_zero_point, output_scale,
output_zero_point, -1, "None", "None", input_constant_tt->dtype);

return InferType(requantize);
}

TVM_REGISTER_GLOBAL("relay.backend.contrib.ethos-n.ConvertQnnAdd").set_body_typed(ConvertQnnAdd);

class ConvertEquivalentsMutator : public MixedModeMutator {
public:
Expr Rewrite_(const CallNode* pre, const Expr& post) override {
Expand All @@ -108,11 +201,25 @@ class ConvertEquivalentsMutator : public MixedModeMutator {
Expr new_func_body = ConvertQnnMultiply(func->body);
new_func = WithFields(func, func->params, new_func_body);
new_func = WithAttr(std::move(new_func), attr::kComposite, String("ethos-n.qnn_conv2d"));
} else if (composite_name == "ethos-n.qnn_add" && CheckCanConvertAdd(func->body)) {
Expr new_func_body = ConvertQnnAdd(func->body);
new_func = WithFields(func, func->params, new_func_body);
new_func = WithAttr(std::move(new_func), attr::kComposite, String("ethos-n.qnn_conv2d"));
}

Call new_call = WithFields(call, new_func);
return Downcast<Expr>(new_call);
}

private:
/*!
* \brief Check whether add can be converted to depthwise, or whether
* it should be offloaded as a normal add operation.
*/
bool CheckCanConvertAdd(const Expr& expr) {
Call call = Downcast<Call>(expr);
return call->args[0]->IsInstance<ConstantNode>() || call->args[1]->IsInstance<ConstantNode>();
}
};

tvm::transform::Pass ConvertEquivalents() {
Expand Down
3 changes: 2 additions & 1 deletion tests/python/contrib/test_ethosn/infrastructure.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ def make_module(func, params):

def make_ethosn_composite(ethosn_expr, name):
vars = relay.analysis.free_vars(ethosn_expr)
func = relay.Function([relay.Var("a")], ethosn_expr)
inner_vars = [relay.Var(v.name_hint, v.type_annotation) for v in vars]
func = relay.Function(inner_vars, ethosn_expr)
func = func.with_attr("Composite", name)
call = relay.Call(func, vars)
return call
Expand Down
Loading

0 comments on commit 0e6ae97

Please sign in to comment.