Skip to content

Commit

Permalink
[microTVM]Add test for MLPerfTiny models (apache#13978)
Browse files Browse the repository at this point in the history
* Add test for mlperftiny models for host_driven and EEMBC runner versions

* refactor mlperftiny models in different links

* fix link

* address comments and fix error
  • Loading branch information
mehrdadh authored Feb 22, 2023
1 parent b732e3c commit a3b51f1
Show file tree
Hide file tree
Showing 5 changed files with 314 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include "api/submitter_implemented.h"

#include <assert.h>
#include <string.h>
#include <tvm/runtime/crt/logging.h>
#include <tvm/runtime/crt/platform.h>
Expand Down Expand Up @@ -161,11 +162,7 @@ void Infer(void* input_ptr) {

struct tvmgen_default_outputs outputs = {
#if TARGET_MODEL == MODEL_KWS
#if COMPILE_WITH_CMSISNN
.Identity = output_data,
#else
.output = output_data,
#endif
#elif TARGET_MODEL == MODEL_IC
.Identity_int8 = output_data,
#elif TARGET_MODEL == MODEL_VWW
Expand Down
4 changes: 2 additions & 2 deletions gallery/how_to/work_with_microtvm/micro_aot.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@
# **Note:** By default this tutorial runs on x86 CPU using CRT, if you would like to run on Zephyr platform
# you need to export `TVM_MICRO_USE_HW` environment variable.
#
MODEL_URL = "https://github.com/tlc-pack/web-data/raw/main/testdata/microTVM/model/keyword_spotting_quant.tflite"
MODEL_PATH = download_testdata(MODEL_URL, "keyword_spotting_quant.tflite", module="model")
MODEL_URL = "https://github.com/mlcommons/tiny/raw/bceb91c5ad2e2deb295547d81505721d3a87d578/benchmark/training/keyword_spotting/trained_models/kws_ref_model.tflite"
MODEL_PATH = download_testdata(MODEL_URL, "kws_ref_model.tflite", module="model")
SAMPLE_URL = "https://github.com/tlc-pack/web-data/raw/main/testdata/microTVM/data/keyword_spotting_int8_6.pyc.npy"
SAMPLE_PATH = download_testdata(SAMPLE_URL, "keyword_spotting_int8_6.pyc.npy", module="data")

Expand Down
301 changes: 301 additions & 0 deletions tests/micro/common/test_mlperftiny.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,301 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import os
from urllib.parse import urlparse

import pytest
import tensorflow as tf
import numpy as np
import tarfile
import tempfile
import pathlib

import tvm
import tvm.testing
from tvm import relay
from tvm.relay.backend import Executor, Runtime
from tvm.micro.project_api import server
from tvm.contrib.download import download_testdata
from tvm.micro import export_model_library_format
from tvm.micro.model_library_format import generate_c_interface_header
from tvm.micro.testing import create_aot_session, predict_labels_aot
from tvm.micro.testing.utils import (
create_header_file,
mlf_extract_workspace_size_bytes,
)

MLPERF_TINY_MODELS = {
"kws": {
"name": "Keyword Spotting",
"index": 1,
"url": "https://github.com/mlcommons/tiny/raw/bceb91c5ad2e2deb295547d81505721d3a87d578/benchmark/training/keyword_spotting/trained_models/kws_ref_model.tflite",
"sample": "https://github.com/tlc-pack/web-data/raw/main/testdata/microTVM/data/keyword_spotting_int8_6.pyc.npy",
},
"vww": {
"name": "Visual Wake Words",
"index": 2,
"url": "https://github.com/mlcommons/tiny/raw/bceb91c5ad2e2deb295547d81505721d3a87d578/benchmark/training/visual_wake_words/trained_models/vww_96_int8.tflite",
"sample": "https://github.com/tlc-pack/web-data/raw/main/testdata/microTVM/data/visual_wake_word_int8_1.npy",
},
# Note: The reason we use quantized model with float32 I/O is
# that TVM does not handle the int8 I/O correctly and accuracy
# would drop significantly.
"ad": {
"name": "Anomaly Detection",
"index": 3,
"url": "https://github.com/mlcommons/tiny/raw/bceb91c5ad2e2deb295547d81505721d3a87d578/benchmark/training/anomaly_detection/trained_models/ToyCar/baseline_tf23/model/model_ToyCar_quant_fullint_micro.tflite",
"sample": "https://github.com/tlc-pack/web-data/raw/main/testdata/microTVM/data/anomaly_detection_normal_id_01.npy",
# This model takes in a (1, 640) vector, so it must be called 40 times -
# once for each time slice.
},
"ic": {
"name": "Image Classification",
"index": 4,
"url": "https://github.com/mlcommons/tiny/raw/bceb91c5ad2e2deb295547d81505721d3a87d578/benchmark/training/image_classification/trained_models/pretrainedResnet_quant.tflite",
"sample": "https://github.com/tlc-pack/web-data/raw/main/testdata/microTVM/data/image_classification_int8_0.npy",
},
}


def mlperftiny_get_module(model_name: str):
model_url = MLPERF_TINY_MODELS[model_name]["url"]
url = urlparse(model_url)
model_path = download_testdata(model_url, os.path.basename(url.path), module="model")

tflite_model_buf = open(model_path, "rb").read()
try:
import tflite

tflite_model = tflite.Model.GetRootAsModel(tflite_model_buf, 0)
except AttributeError:
import tflite.Model

tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buf, 0)

interpreter = tf.lite.Interpreter(model_path=str(model_path))
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

model_info = {
"input_name": input_details[0]["name"],
"input_shape": tuple(input_details[0]["shape"]),
"input_dtype": np.dtype(input_details[0]["dtype"]).name,
"output_name": output_details[0]["name"],
"output_shape": tuple(output_details[0]["shape"]),
"output_dtype": np.dtype(output_details[0]["dtype"]).name,
}

if model_name != "ad":
model_info["quant_output_scale"] = output_details[0]["quantization_parameters"]["scales"][0]
model_info["quant_output_zero_point"] = output_details[0]["quantization_parameters"][
"zero_points"
][0]

relay_mod, params = relay.frontend.from_tflite(
tflite_model,
shape_dict={model_info["input_name"]: model_info["input_shape"]},
dtype_dict={model_info["input_name"]: model_info["input_dtype"]},
)
return relay_mod, params, model_info


def get_test_data(model_name: str) -> list:
sample_url = MLPERF_TINY_MODELS[model_name]["sample"]
url = urlparse(sample_url)
sample_path = download_testdata(sample_url, os.path.basename(url.path), module="data")
return [np.load(sample_path)]


def predict_ad_labels_aot(session, aot_executor, input_data, runs_per_sample=1):
"""A special version of tvm/micro/testing/evaluation.py's predict_labels_aot.
The runtime returned for each sample is the median of the runtimes for all slices
in that sample."""

assert runs_per_sample > 0
assert aot_executor.get_num_inputs() == 1
assert aot_executor.get_num_outputs() == 1

sample_counter = 0
for sample in input_data:
output_fp32 = np.empty_like(sample)
slice_runtimes = []

for i, time_slice in enumerate(sample):
aot_executor.get_input(0).copyfrom(time_slice.reshape((1, 640)))
result = aot_executor.module.time_evaluator(
"run", session.device, number=runs_per_sample
)()
slice_runtimes.append(result.mean)
output_fp32[i, :] = aot_executor.get_output(0).numpy()

sample_counter += 1
errors = np.mean(np.square(sample - output_fp32), axis=1)
yield np.mean(errors), np.median(slice_runtimes)


@pytest.mark.parametrize("model_name", ["kws", "vww", "ad", "ic"])
@pytest.mark.parametrize("project_type", ["host_driven", "mlperftiny"])
@tvm.testing.requires_micro
@pytest.mark.skip_boards(
["mps2_an521", "mps3_an547", "stm32f746g_disco", "nucleo_f746zg", "nrf5340dk_nrf5340_cpuapp"]
)
def test_mlperftiny_models(platform, board, workspace_dir, serial_number, model_name, project_type):
"""MLPerfTiny models test.
Testing MLPerfTiny models using host_driven project. In this case one input sample is used
to verify the end to end execution. Accuracy is not checked in this test.
Also, this test builds each model in standalone mode that can be used with EEMBC runner.
"""
if platform != "zephyr":
pytest.skip(reason="Other platforms are not supported yet.")

use_cmsis_nn = False
relay_mod, params, model_info = mlperftiny_get_module(model_name)
target = tvm.micro.testing.get_target(platform, board)
project_options = {"config_main_stack_size": 4000, "serial_number": serial_number}

if use_cmsis_nn:
project_options["cmsis_path"] = os.getenv("CMSIS_PATH")

if model_name == "ad":
predictor = predict_ad_labels_aot
else:
predictor = predict_labels_aot

if project_type == "host_driven":
with create_aot_session(
platform,
board,
target,
relay_mod,
params,
build_dir=workspace_dir,
# The longest models take ~5 seconds to infer, but running them
# ten times (with NUM_TESTING_RUNS_PER_SAMPLE) makes that 50
timeout_override=server.TransportTimeouts(
session_start_retry_timeout_sec=300,
session_start_timeout_sec=150,
session_established_timeout_sec=150,
),
project_options=project_options,
use_cmsis_nn=use_cmsis_nn,
) as session:
aot_executor = tvm.runtime.executor.aot_executor.AotModule(
session.create_aot_executor()
)
args = {
"session": session,
"aot_executor": aot_executor,
"input_data": get_test_data(model_name),
"runs_per_sample": 10,
}
predicted_labels, runtimes = zip(*predictor(**args))

avg_runtime = float(np.mean(runtimes) * 1000)
print(f"Model {model_name} average runtime: {avg_runtime}")

elif project_type == "mlperftiny":
runtime = Runtime("crt")
executor = Executor(
"aot", {"unpacked-api": True, "interface-api": "c", "workspace-byte-alignment": 8}
)

config = {"tir.disable_vectorize": True}
if use_cmsis_nn:
from tvm.relay.op.contrib import cmsisnn

config["relay.ext.cmsisnn.options"] = {"mcpu": target.mcpu}
relay_mod = cmsisnn.partition_for_cmsisnn(relay_mod, params, mcpu=target.mcpu)

with tvm.transform.PassContext(opt_level=3, config=config):
module = tvm.relay.build(
relay_mod, target=target, params=params, runtime=runtime, executor=executor
)

temp_dir = tvm.contrib.utils.tempdir()
model_tar_path = temp_dir / "model.tar"
export_model_library_format(module, model_tar_path)
workspace_size = mlf_extract_workspace_size_bytes(model_tar_path)

extra_tar_dir = tvm.contrib.utils.tempdir()
extra_tar_file = extra_tar_dir / "extra.tar"
with tarfile.open(extra_tar_file, "w:gz") as tf:
with tempfile.TemporaryDirectory() as tar_temp_dir:
model_files_path = os.path.join(tar_temp_dir, "include")
os.mkdir(model_files_path)
header_path = generate_c_interface_header(
module.libmod_name,
[model_info["input_name"]],
[model_info["output_name"]],
[],
{},
[],
0,
model_files_path,
{},
{},
)
tf.add(header_path, arcname=os.path.relpath(header_path, tar_temp_dir))

create_header_file(
"output_data",
np.zeros(
shape=model_info["output_shape"],
dtype=model_info["output_dtype"],
),
"include/tvm",
tf,
)

input_total_size = 1
input_shape = model_info["input_shape"]
for i in range(len(input_shape)):
input_total_size *= input_shape[i]

template_project_path = pathlib.Path(tvm.micro.get_microtvm_template_projects(platform))
project_options.update(
{
"extra_files_tar": str(extra_tar_file),
"project_type": project_type,
"board": board,
"compile_definitions": [
f"-DWORKSPACE_SIZE={workspace_size + 512}", # Memory workspace size, 512 is a temporary offset
# since the memory calculation is not accurate.
f'-DTARGET_MODEL={MLPERF_TINY_MODELS[model_name]["index"]}', # Sets the model index for project compilation.
f"-DTH_MODEL_VERSION=EE_MODEL_VERSION_{model_name.upper()}01", # Sets model version. This is required by MLPerfTiny API.
f"-DMAX_DB_INPUT_SIZE={input_total_size}", # Max size of the input data array.
],
}
)

if model_name != "ad":
project_options["compile_definitions"].append(
f'-DOUT_QUANT_SCALE={model_info["quant_output_scale"]}'
)
project_options["compile_definitions"].append(
f'-DOUT_QUANT_ZERO={model_info["quant_output_zero_point"]}'
)

project = tvm.micro.project.generate_project_from_mlf(
template_project_path, workspace_dir / "project", model_tar_path, project_options
)
project.build()


if __name__ == "__main__":
tvm.testing.main()
6 changes: 3 additions & 3 deletions tests/micro/zephyr/test_zephyr_aot_exec_standalone.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ def test_tflite(workspace_dir, board, microtvm_debug, serial_number):
output_shape = (1, 12)
build_config = {"debug": microtvm_debug}

model_url = "https://github.com/tlc-pack/web-data/raw/25fe99fb00329a26bd37d3dca723da94316fd34c/testdata/microTVM/model/keyword_spotting_quant.tflite"
model_path = download_testdata(model_url, "keyword_spotting_quant.tflite", module="model")
model_url = "https://github.com/mlcommons/tiny/raw/bceb91c5ad2e2deb295547d81505721d3a87d578/benchmark/training/keyword_spotting/trained_models/kws_ref_model.tflite"
model_path = download_testdata(model_url, "kws_ref_model.tflite", module="model")

# Import TFLite model
tflite_model_buf = open(model_path, "rb").read()
Expand Down Expand Up @@ -68,7 +68,7 @@ def test_tflite(workspace_dir, board, microtvm_debug, serial_number):
):
lowered = relay.build(relay_mod, target, params=params, runtime=runtime, executor=executor)

sample_url = "https://github.com/tlc-pack/web-data/raw/967fc387dadb272c5a7f8c3461d34c060100dbf1/testdata/microTVM/data/keyword_spotting_int8_6.pyc.npy"
sample_url = "https://github.com/tlc-pack/web-data/raw/main/testdata/microTVM/data/keyword_spotting_int8_6.pyc.npy"
sample_path = download_testdata(sample_url, "keyword_spotting_int8_6.pyc.npy", module="data")
sample = np.load(sample_path)

Expand Down
Loading

0 comments on commit a3b51f1

Please sign in to comment.