Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ETHOSN] Support conversion of add to depthwise #12531

Merged
merged 1 commit into from
Aug 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 34 additions & 9 deletions python/tvm/relay/op/contrib/ethosn.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,24 @@ def qnn_mul_pattern():
input_is_right = gen_mul_inputs(is_constant(), wildcard())
return input_is_left | input_is_right

def qnn_add_pattern():
add_op = is_op("qnn.add")
gen_add_inputs = lambda x, y: add_op(
x,
y,
is_constant(),
is_constant(),
is_constant(),
is_constant(),
is_constant(),
is_constant(),
)
two_inputs = gen_add_inputs(wildcard(), wildcard())
input_is_left = gen_add_inputs(wildcard(), is_constant())
input_is_right = gen_add_inputs(is_constant(), wildcard())

return input_is_left | input_is_right | two_inputs

def check_conv2d(extract):
"""Check if a conv2d is supported by Ethos-N."""
if not ethosn_available():
Expand Down Expand Up @@ -289,8 +307,24 @@ def check_resize(extract):

return _ethosn.resize(extract)

def check_add(extract):
"""Check if an addition is supported by Ethos-N."""
if not ethosn_available():
return False
# Do not support scalar constants for now
check_scalar = lambda i: isinstance(i, tvm.relay.Constant) and len(i.data.shape) == 0
if check_scalar(extract.args[0]) or check_scalar(extract.args[1]):
return False

inputs = extract.args[0:2]
if any([isinstance(i, tvm.relay.Constant) for i in inputs]):
extract = _ethosn.ConvertQnnAdd(extract)
return _ethosn.conv2d(extract)
return _ethosn.addition(extract)

return [
("ethos-n.qnn_mul", qnn_mul_pattern(), check_mul),
("ethos-n.qnn_add", qnn_add_pattern(), check_add),
("ethos-n.qnn_conv2d", qnn_conv_pattern(), check_conv2d),
("ethos-n.qnn_avg_pool2d", qnn_avg_pool2d_pattern(), check_avg_pool2d),
("ethos-n.qnn_sigmoid", qnn_sigmoid_pattern(), check_sigmoid),
Expand Down Expand Up @@ -332,15 +366,6 @@ def reshape(expr):
return _ethosn.reshape(expr)


@tvm.ir.register_op_attr("qnn.add", "target.ethos-n")
def qnn_add(expr):
"""Check if an addition is supported by Ethos-N."""
if not ethosn_available():
return False

return _ethosn.addition(expr)


@tvm.ir.register_op_attr("qnn.concatenate", "target.ethos-n")
def qnn_concatenate(expr):
"""Check if a concatenate is supported by Ethos-N."""
Expand Down
8 changes: 4 additions & 4 deletions src/relay/backend/contrib/ethosn/codegen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,9 @@ void InferTensorsVisitor::InferCall(const CallNode* cn) {
params.input_info = GetTensorInfo(tensor_table_, call);
err += EthosnAPI::Reshape(call, &params);
tensor_table_[cn->args[0]] = {params.input_info};
} else if (IsEthosnOp(call, "qnn.add")) {
} else if (IsEthosnFunc(call, "ethos-n.qnn_add")) {
AdditionParams params;
err += EthosnAPI::Addition(call, &params);
err += EthosnAPI::Addition(cn->op.as<FunctionNode>()->body, &params);
tensor_table_[cn->args[0]] = {params.lhs_info};
tensor_table_[cn->args[1]] = {params.rhs_info};
} else if (IsEthosnFunc(call, "ethos-n.qnn_sigmoid")) {
Expand Down Expand Up @@ -296,7 +296,7 @@ sl::TensorsAndId ConstructNetworkVisitor::HandleCall(const CallNode* cn) {
} else if (IsEthosnOp(call, "reshape")) {
if ((err = MakeReshapeLayer(call, &tensor))) ReportFatalError(call, err);
return MakeOps(tensor);
} else if (IsEthosnOp(call, "qnn.add")) {
} else if (IsEthosnFunc(call, "ethos-n.qnn_add")) {
if ((err = MakeAdditionLayer(call, &tensor))) ReportFatalError(call, err);
return MakeOps(tensor);
} else if (IsEthosnFunc(call, "ethos-n.qnn_sigmoid")) {
Expand Down Expand Up @@ -468,7 +468,7 @@ EthosnError ConstructNetworkVisitor::MakeReshapeLayer(const Call& call,
EthosnError ConstructNetworkVisitor::MakeAdditionLayer(const Call& call,
sl::TensorAndId<sl::Operand>* out) {
AdditionParams params;
if (auto err = EthosnAPI::Addition(call, &params)) {
if (auto err = EthosnAPI::Addition(call->op.as<FunctionNode>()->body, &params)) {
return err;
}

Expand Down
109 changes: 108 additions & 1 deletion src/relay/backend/contrib/ethosn/convert_equivalent.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,20 @@ namespace relay {
namespace contrib {
namespace ethosn {

/*!
* \brief Apply constant folding on an expression.
*
* \param expr The expression to fold.
* \param fold_qnn Whether to fold constants for QNN operations.
* \returns The new folded expression.
*/
Expr FoldConstantExpr(const Expr& expr, bool fold_qnn = true) {
auto mod = IRModule::FromExpr(expr);
mod = transform::FoldConstant(fold_qnn)(mod);
auto entry_func = Downcast<Function>(mod->Lookup("main"));
return expr.as<FunctionNode>() == nullptr ? entry_func->body : entry_func;
}

/*!
* \brief Converts qnn.mul to mathematically equivalent
* qnn.conv2d depthwise operation.
Expand Down Expand Up @@ -65,7 +79,9 @@ Expr ConvertQnnMultiply(const Expr& expr) {

const auto* input_constant = input2.as<ConstantNode>();
ICHECK(input_constant) << "Expected ConstantNode but got " << input2->GetTypeKey();
const auto* input_constant_tt = input_constant->checked_type().as<TensorTypeNode>();
Type input_constant_type = input_constant->checked_type();
const auto* input_constant_tt = input_constant_type.as<TensorTypeNode>();
ICHECK(input_constant) << "Expected TensorTypeNode but got " << input_constant_type->GetTypeKey();
int channels = input_constant_tt->shape.back().as<IntImmNode>()->value;

runtime::NDArray input_data = input_constant->data;
Expand Down Expand Up @@ -93,6 +109,83 @@ Expr ConvertQnnMultiply(const Expr& expr) {
TVM_REGISTER_GLOBAL("relay.backend.contrib.ethos-n.ConvertQnnMultiply")
.set_body_typed(ConvertQnnMultiply);

/*!
* \brief Converts qnn.add to a mathematically equivalent
* qnn.conv2d depthwise operation.
*/
Expr ConvertQnnAdd(const Expr& expr) {
Call call = Downcast<Call>(expr);

Expr input1 = call->args[0];
Expr input2 = call->args[1];
Expr input1_scale = call->args[2];
Expr input1_zero_point = call->args[3];
Expr input2_scale = call->args[4];
Expr input2_zero_point = call->args[5];
// Reverse the inputs if the constant is first input
if (call->args[0]->IsInstance<ConstantNode>()) {
input1 = call->args[1];
input2 = call->args[0];
input1_scale = call->args[4];
input1_zero_point = call->args[5];
input2_scale = call->args[2];
input2_zero_point = call->args[3];
}
Expr output_scale = call->args[6];
Expr output_zero_point = call->args[7];

const auto* input_constant = input2.as<ConstantNode>();
ICHECK(input_constant) << "Expected ConstantNode but got " << input2->GetTypeKey();
Type input_constant_type = input_constant->checked_type();
const auto* input_constant_tt = input_constant_type.as<TensorTypeNode>();
ICHECK(input_constant) << "Expected TensorTypeNode but got " << input_constant_type->GetTypeKey();
int channels = input_constant_tt->shape.back().as<IntImmNode>()->value;

// Create the identity kernel. The kernel data is constructed such that it produces an identity
// operation in the quantized space. Therefore, the input is not scaled in any way which allows
// us to later use the bias to perform the addition.
float input_scale_value = GetScalarFromConstant<float>(input1_scale);
float output_scale_value = GetScalarFromConstant<float>(output_scale);
float identity_kernel_scale_ub = std::min(output_scale_value / input_scale_value, 1.f);
float identity_kernel_scale_lb = (1.f / 255.f);
float identity_kernel_scale_target = (identity_kernel_scale_ub + identity_kernel_scale_lb) / 2.f;
float identity_kernel_scale_recip_rounded = std::round(1.f / identity_kernel_scale_target);
float identity_kernel_scale_value = 1.f / identity_kernel_scale_recip_rounded;
Constant identity_kernel_scale =
MakeConstantScalar(DataType::Float(32), identity_kernel_scale_value);
Constant identity_kernel_zero_point = MakeConstantScalar(DataType::Int(32), 0);
float identity_kernel_quantized_data = identity_kernel_scale_recip_rounded;
std::vector<uint8_t> identity_kernel_data(channels,
static_cast<uint8_t>(identity_kernel_quantized_data));
Constant identity_kernel =
MakeConstantTensor(input_constant_tt->dtype, {1, 1, channels, 1}, identity_kernel_data);

// Calculate the bias, this is where the addition happens. The bias values are calculated by
// scaling the constant input to input_scale * identity_kernel_scale.
Constant bias_scale =
MakeConstantScalar(DataType::Float(32), input_scale_value * identity_kernel_scale_value);
Constant bias_zero_point = MakeConstantScalar(DataType::Int(32), 0);
Expr requantize_bias =
qnn::MakeRequantize(input2, input2_scale, input2_zero_point, bias_scale, bias_zero_point, -1,
"None", "None", DataType::Int(32));
Expr reshape_bias = MakeReshape(requantize_bias, {channels});
Constant bias = Downcast<Constant>(FoldConstantExpr(reshape_bias));

// Make depthwise conv2d operation
Expr conv2d =
qnn::MakeQnnConv2D(input1, identity_kernel, input1_zero_point, identity_kernel_zero_point,
input1_scale, identity_kernel_scale, {1, 1}, {0, 0, 0, 0}, {1, 1},
channels, channels, {1, 1}, "NHWC", "HWOI", "NHWC", DataType::Int(32));
Expr bias_add = MakeBiasAdd(conv2d, bias, 3);
Expr requantize =
qnn::MakeRequantize(bias_add, input1_scale, input1_zero_point, output_scale,
output_zero_point, -1, "None", "None", input_constant_tt->dtype);

return InferType(requantize);
}

TVM_REGISTER_GLOBAL("relay.backend.contrib.ethos-n.ConvertQnnAdd").set_body_typed(ConvertQnnAdd);

class ConvertEquivalentsMutator : public MixedModeMutator {
public:
Expr Rewrite_(const CallNode* pre, const Expr& post) override {
Expand All @@ -108,11 +201,25 @@ class ConvertEquivalentsMutator : public MixedModeMutator {
Expr new_func_body = ConvertQnnMultiply(func->body);
new_func = WithFields(func, func->params, new_func_body);
new_func = WithAttr(std::move(new_func), attr::kComposite, String("ethos-n.qnn_conv2d"));
} else if (composite_name == "ethos-n.qnn_add" && CheckCanConvertAdd(func->body)) {
Expr new_func_body = ConvertQnnAdd(func->body);
new_func = WithFields(func, func->params, new_func_body);
new_func = WithAttr(std::move(new_func), attr::kComposite, String("ethos-n.qnn_conv2d"));
}

Call new_call = WithFields(call, new_func);
return Downcast<Expr>(new_call);
}

private:
/*!
* \brief Check whether add can be converted to depthwise, or whether
* it should be offloaded as a normal add operation.
*/
bool CheckCanConvertAdd(const Expr& expr) {
Call call = Downcast<Call>(expr);
return call->args[0]->IsInstance<ConstantNode>() || call->args[1]->IsInstance<ConstantNode>();
}
};

tvm::transform::Pass ConvertEquivalents() {
Expand Down
3 changes: 2 additions & 1 deletion tests/python/contrib/test_ethosn/infrastructure.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ def make_module(func, params):

def make_ethosn_composite(ethosn_expr, name):
vars = relay.analysis.free_vars(ethosn_expr)
func = relay.Function([relay.Var("a")], ethosn_expr)
inner_vars = [relay.Var(v.name_hint, v.type_annotation) for v in vars]
func = relay.Function(inner_vars, ethosn_expr)
func = func.with_attr("Composite", name)
call = relay.Call(func, vars)
return call
Expand Down
Loading