Skip to content

Commit

Permalink
[ETHOSN] Support conversion of add to depthwise
Browse files Browse the repository at this point in the history
In similar fashion to the conversion of mul to depthwise, this commit
converts add when one input is a constant of shape [1, ..., n] to a
depthwise convolution. If neither input is a constant, the add is
offloaded naturally like before.

The addition testing has been improved to use pytest features.

Change-Id: I93e7b7619736767992e70aaa6e60e0d7a2875f76
  • Loading branch information
lhutton1 committed Aug 22, 2022
1 parent 78b1dc2 commit 7e65300
Show file tree
Hide file tree
Showing 6 changed files with 376 additions and 100 deletions.
43 changes: 34 additions & 9 deletions python/tvm/relay/op/contrib/ethosn.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,24 @@ def qnn_mul_pattern():
input_is_right = gen_mul_inputs(is_constant(), wildcard())
return input_is_left | input_is_right

def qnn_add_pattern():
add_op = is_op("qnn.add")
gen_add_inputs = lambda x, y: add_op(
x,
y,
is_constant(),
is_constant(),
is_constant(),
is_constant(),
is_constant(),
is_constant(),
)
two_inputs = gen_add_inputs(wildcard(), wildcard())
input_is_left = gen_add_inputs(wildcard(), is_constant())
input_is_right = gen_add_inputs(is_constant(), wildcard())

return input_is_left | input_is_right | two_inputs

def check_conv2d(extract):
"""Check if a conv2d is supported by Ethos-N."""
if not ethosn_available():
Expand Down Expand Up @@ -258,8 +276,24 @@ def check_requantize(extract):

return _ethosn.requantize(extract)

def check_add(extract):
"""Check if an addition is supported by Ethos-N."""
if not ethosn_available():
return False
# Do not support scalar constants for now
check_scalar = lambda i: isinstance(i, tvm.relay.Constant) and len(i.data.shape) == 0
if check_scalar(extract.args[0]) or check_scalar(extract.args[1]):
return False

inputs = extract.args[0:2]
if any([isinstance(i, tvm.relay.Constant) for i in inputs]):
extract = _ethosn.ConvertQnnAdd(extract)
return _ethosn.conv2d(extract)
return _ethosn.addition(extract)

return [
("ethos-n.qnn_mul", qnn_mul_pattern(), check_mul),
("ethos-n.qnn_add", qnn_add_pattern(), check_add),
("ethos-n.qnn_conv2d", qnn_conv_pattern(), check_conv2d),
("ethos-n.qnn_avg_pool2d", qnn_avg_pool2d_pattern(), check_avg_pool2d),
("ethos-n.qnn_sigmoid", qnn_sigmoid_pattern(), check_sigmoid),
Expand Down Expand Up @@ -300,15 +334,6 @@ def reshape(expr):
return _ethosn.reshape(expr)


@tvm.ir.register_op_attr("qnn.add", "target.ethos-n")
def qnn_add(expr):
"""Check if an addition is supported by Ethos-N."""
if not ethosn_available():
return False

return _ethosn.addition(expr)


@tvm.ir.register_op_attr("qnn.concatenate", "target.ethos-n")
def qnn_concatenate(expr):
"""Check if a concatenate is supported by Ethos-N."""
Expand Down
8 changes: 4 additions & 4 deletions src/relay/backend/contrib/ethosn/codegen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,9 @@ void InferTensorsVisitor::InferCall(const CallNode* cn) {
params.input_info = GetTensorInfo(tensor_table_, call);
err += EthosnAPI::Reshape(call, &params);
tensor_table_[cn->args[0]] = {params.input_info};
} else if (IsEthosnOp(call, "qnn.add")) {
} else if (IsEthosnFunc(call, "ethos-n.qnn_add")) {
AdditionParams params;
err += EthosnAPI::Addition(call, &params);
err += EthosnAPI::Addition(cn->op.as<FunctionNode>()->body, &params);
tensor_table_[cn->args[0]] = {params.lhs_info};
tensor_table_[cn->args[1]] = {params.rhs_info};
} else if (IsEthosnFunc(call, "ethos-n.qnn_sigmoid")) {
Expand Down Expand Up @@ -291,7 +291,7 @@ sl::TensorsAndId ConstructNetworkVisitor::HandleCall(const CallNode* cn) {
} else if (IsEthosnOp(call, "reshape")) {
if ((err = MakeReshapeLayer(call, &tensor))) ReportFatalError(call, err);
return MakeOps(tensor);
} else if (IsEthosnOp(call, "qnn.add")) {
} else if (IsEthosnFunc(call, "ethos-n.qnn_add")) {
if ((err = MakeAdditionLayer(call, &tensor))) ReportFatalError(call, err);
return MakeOps(tensor);
} else if (IsEthosnFunc(call, "ethos-n.qnn_sigmoid")) {
Expand Down Expand Up @@ -460,7 +460,7 @@ EthosnError ConstructNetworkVisitor::MakeReshapeLayer(const Call& call,
EthosnError ConstructNetworkVisitor::MakeAdditionLayer(const Call& call,
sl::TensorAndId<sl::Operand>* out) {
AdditionParams params;
if (auto err = EthosnAPI::Addition(call, &params)) {
if (auto err = EthosnAPI::Addition(call->op.as<FunctionNode>()->body, &params)) {
return err;
}

Expand Down
109 changes: 108 additions & 1 deletion src/relay/backend/contrib/ethosn/convert_equivalent.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,20 @@ namespace relay {
namespace contrib {
namespace ethosn {

/*!
* \brief Apply constant folding on an expression.
*
* \param expr The expression to fold.
* \param fold_qnn Whether to fold constants for QNN operations.
* \returns The new folded expression.
*/
Expr FoldConstantExpr(const Expr& expr, bool fold_qnn = true) {
auto mod = IRModule::FromExpr(expr);
mod = transform::FoldConstant(fold_qnn)(mod);
auto entry_func = Downcast<Function>(mod->Lookup("main"));
return expr.as<FunctionNode>() == nullptr ? entry_func->body : entry_func;
}

/*!
* \brief Converts qnn.mul to mathematically equivalent
* qnn.conv2d depthwise operation.
Expand Down Expand Up @@ -65,7 +79,9 @@ Expr ConvertQnnMultiply(const Expr& expr) {

const auto* input_constant = input2.as<ConstantNode>();
ICHECK(input_constant) << "Expected ConstantNode but got " << input2->GetTypeKey();
const auto* input_constant_tt = input_constant->checked_type().as<TensorTypeNode>();
Type input_constant_type = input_constant->checked_type();
const auto* input_constant_tt = input_constant_type.as<TensorTypeNode>();
ICHECK(input_constant) << "Expected TensorTypeNode but got " << input_constant_type->GetTypeKey();
int channels = input_constant_tt->shape.back().as<IntImmNode>()->value;

runtime::NDArray input_data = input_constant->data;
Expand Down Expand Up @@ -93,6 +109,83 @@ Expr ConvertQnnMultiply(const Expr& expr) {
TVM_REGISTER_GLOBAL("relay.backend.contrib.ethos-n.ConvertQnnMultiply")
.set_body_typed(ConvertQnnMultiply);

/*!
* \brief Converts qnn.add to a mathematically equivalent
* qnn.conv2d depthwise operation.
*/
Expr ConvertQnnAdd(const Expr& expr) {
Call call = Downcast<Call>(expr);

Expr input1 = call->args[0];
Expr input2 = call->args[1];
Expr input1_scale = call->args[2];
Expr input1_zero_point = call->args[3];
Expr input2_scale = call->args[4];
Expr input2_zero_point = call->args[5];
// Reverse the inputs if the constant is first input
if (call->args[0]->IsInstance<ConstantNode>()) {
input1 = call->args[1];
input2 = call->args[0];
input1_scale = call->args[4];
input1_zero_point = call->args[5];
input2_scale = call->args[2];
input2_zero_point = call->args[3];
}
Expr output_scale = call->args[6];
Expr output_zero_point = call->args[7];

const auto* input_constant = input2.as<ConstantNode>();
ICHECK(input_constant) << "Expected ConstantNode but got " << input2->GetTypeKey();
Type input_constant_type = input_constant->checked_type();
const auto* input_constant_tt = input_constant_type.as<TensorTypeNode>();
ICHECK(input_constant) << "Expected TensorTypeNode but got " << input_constant_type->GetTypeKey();
int channels = input_constant_tt->shape.back().as<IntImmNode>()->value;

// Create the identity kernel. The kernel data is constructed such that it produces an identity
// operation in the quantized space. Therefore, the input is not scaled in any way which allows
// us to later use the bias to perform the addition.
float input_scale_value = GetScalarFromConstant<float>(input1_scale);
float output_scale_value = GetScalarFromConstant<float>(output_scale);
float identity_kernel_scale_ub = std::min(output_scale_value / input_scale_value, 1.f);
float identity_kernel_scale_lb = (1.f / 255.f);
float identity_kernel_scale_target = (identity_kernel_scale_ub + identity_kernel_scale_lb) / 2.f;
float identity_kernel_scale_recip_rounded = std::round(1.f / identity_kernel_scale_target);
float identity_kernel_scale_value = 1.f / identity_kernel_scale_recip_rounded;
Constant identity_kernel_scale =
MakeConstantScalar(DataType::Float(32), identity_kernel_scale_value);
Constant identity_kernel_zero_point = MakeConstantScalar(DataType::Int(32), 0);
float identity_kernel_quantized_data = identity_kernel_scale_recip_rounded;
std::vector<uint8_t> identity_kernel_data(channels,
static_cast<uint8_t>(identity_kernel_quantized_data));
Constant identity_kernel =
MakeConstantTensor(input_constant_tt->dtype, {1, 1, channels, 1}, identity_kernel_data);

// Calculate the bias, this is where the addition happens. The bias values are calculated by
// scaling the constant input to input_scale * identity_kernel_scale.
Constant bias_scale =
MakeConstantScalar(DataType::Float(32), input_scale_value * identity_kernel_scale_value);
Constant bias_zero_point = MakeConstantScalar(DataType::Int(32), 0);
Expr requantize_bias =
qnn::MakeRequantize(input2, input2_scale, input2_zero_point, bias_scale, bias_zero_point, -1,
"None", "None", DataType::Int(32));
Expr reshape_bias = MakeReshape(requantize_bias, {channels});
Constant bias = Downcast<Constant>(FoldConstantExpr(reshape_bias));

// Make depthwise conv2d operation
Expr conv2d =
qnn::MakeQnnConv2D(input1, identity_kernel, input1_zero_point, identity_kernel_zero_point,
input1_scale, identity_kernel_scale, {1, 1}, {0, 0, 0, 0}, {1, 1},
channels, channels, {1, 1}, "NHWC", "HWOI", "NHWC", DataType::Int(32));
Expr bias_add = MakeBiasAdd(conv2d, bias, 3);
Expr requantize =
qnn::MakeRequantize(bias_add, input1_scale, input1_zero_point, output_scale,
output_zero_point, -1, "None", "None", input_constant_tt->dtype);

return InferType(requantize);
}

TVM_REGISTER_GLOBAL("relay.backend.contrib.ethos-n.ConvertQnnAdd").set_body_typed(ConvertQnnAdd);

class ConvertEquivalentsMutator : public MixedModeMutator {
public:
Expr Rewrite_(const CallNode* pre, const Expr& post) override {
Expand All @@ -108,11 +201,25 @@ class ConvertEquivalentsMutator : public MixedModeMutator {
Expr new_func_body = ConvertQnnMultiply(func->body);
new_func = WithFields(func, func->params, new_func_body);
new_func = WithAttr(std::move(new_func), attr::kComposite, String("ethos-n.qnn_conv2d"));
} else if (composite_name == "ethos-n.qnn_add" && CheckCanConvertAdd(func->body)) {
Expr new_func_body = ConvertQnnAdd(func->body);
new_func = WithFields(func, func->params, new_func_body);
new_func = WithAttr(std::move(new_func), attr::kComposite, String("ethos-n.qnn_conv2d"));
}

Call new_call = WithFields(call, new_func);
return Downcast<Expr>(new_call);
}

private:
/*!
* \brief Check whether add can be converted to depthwise, or whether
* it should be offloaded as a normal add operation.
*/
bool CheckCanConvertAdd(const Expr& expr) {
Call call = Downcast<Call>(expr);
return call->args[0]->IsInstance<ConstantNode>() || call->args[1]->IsInstance<ConstantNode>();
}
};

tvm::transform::Pass ConvertEquivalents() {
Expand Down
3 changes: 2 additions & 1 deletion tests/python/contrib/test_ethosn/infrastructure.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ def make_module(func, params):

def make_ethosn_composite(ethosn_expr, name):
vars = relay.analysis.free_vars(ethosn_expr)
func = relay.Function([relay.Var("a")], ethosn_expr)
inner_vars = [relay.Var(v.name_hint, v.type_annotation) for v in vars]
func = relay.Function(inner_vars, ethosn_expr)
func = func.with_attr("Composite", name)
call = relay.Call(func, vars)
return call
Expand Down
Loading

0 comments on commit 7e65300

Please sign in to comment.