diff --git a/apps/benchmark/adreno/adreno_gpu_bench_texture.py b/apps/benchmark/adreno/adreno_gpu_bench_texture.py new file mode 100755 index 000000000000..2228cda31a39 --- /dev/null +++ b/apps/benchmark/adreno/adreno_gpu_bench_texture.py @@ -0,0 +1,278 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Benchmark script for various models on Adreno GPU. +""" +import argparse + +import numpy as np + +import os +import sys +import tvm +from tvm import te +from tvm.relay import testing +from tvm.contrib.utils import tempdir +import tvm.contrib.graph_executor as runtime +from tvm import relay +from tvm import autotvm +from tvm.contrib import utils, ndk + + +def get_network(name, batch_size, dtype="float32"): + """Get the symbol definition and random weight of a network + + Parameters + ---------- + name: str + The name of the network, can be 'resnet-18', 'resnet-50', 'vgg-16', 'inception_v3', 'mobilenet', ... + batch_size: int + batch size + dtype: str + Data type + + Returns + ------- + net: tvm.IRModule + The relay function of network definition + params: dict + The random parameters for benchmark + input_shape: tuple + The shape of input tensor + output_shape: tuple + The shape of output tensor + """ + input_shape = (batch_size, 3, 224, 224) + output_shape = (batch_size, 1000) + + if name == "mobilenet": + net, params = testing.mobilenet.get_workload(batch_size=batch_size, dtype=dtype) + elif name == "inception_v3": + input_shape = (batch_size, 3, 299, 299) + net, params = testing.inception_v3.get_workload(batch_size=batch_size, dtype=dtype) + elif "resnet" in name: + n_layer = int(name.split("-")[1]) + net, params = testing.resnet.get_workload( + num_layers=n_layer, batch_size=batch_size, dtype=dtype + ) + elif "vgg" in name: + n_layer = int(name.split("-")[1]) + net, params = testing.vgg.get_workload( + num_layers=n_layer, batch_size=batch_size, dtype=dtype + ) + elif "densenet" in name: + n_layer = int(name.split("-")[1]) + net, params = testing.densenet.get_workload( + densenet_size=n_layer, batch_size=batch_size, dtype=dtype + ) + elif "squeezenet" in name: + version = name.split("_v")[1] + net, params = testing.squeezenet.get_workload( + batch_size=batch_size, version=version, dtype=dtype + ) + elif name == "mxnet": + # an example for mxnet model + from mxnet.gluon.model_zoo.vision import get_model + + block = get_model("resnet18_v1", pretrained=True) + net, params = relay.frontend.from_mxnet(block, shape={"data": input_shape}, dtype=dtype) + net = net["main"] + net = relay.Function( + net.params, relay.nn.softmax(net.body), None, net.type_params, net.attrs + ) + net = tvm.IRModule.from_expr(net) + else: + raise ValueError("Unsupported network: " + name) + + return net, params, input_shape, output_shape + + +def print_progress(msg): + """print progress message + + Parameters + ---------- + msg: str + The message to print + """ + sys.stdout.write(msg + "\r") + sys.stdout.flush() + + +def tune_tasks( + tasks, + measure_option, + n_trial=1024, + early_stopping=None, + log_filename="tuning.log", +): + from tvm.autotvm.tuner import XGBTuner + + tmp_log_file = log_filename + ".tmp" + + for i, tsk in enumerate(reversed(tasks)): + print("Task: ", tsk) + prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) + tuner_obj = XGBTuner(tsk, loss_type="rank") + + tsk_trial = min(n_trial, len(tsk.config_space)) + tuner_obj.tune( + n_trial=tsk_trial, + early_stopping=early_stopping, + measure_option=measure_option, + callbacks=[ + autotvm.callback.progress_bar(tsk_trial, prefix=prefix), + autotvm.callback.log_to_file(tmp_log_file), + ], + ) + + autotvm.record.pick_best(tmp_log_file, log_filename) + + +def evaluate_network(network, target, target_host, dtype, repeat): + print_progress(network) + net, params, input_shape, output_shape = get_network(network, batch_size=1, dtype=dtype) + + # Auto Tuning + tune_log = "adreno-" + network + "-" + dtype + ".log" + tuning_options = { + "log_filename": tune_log, + "early_stopping": None, + "measure_option": autotvm.measure_option( + builder=autotvm.LocalBuilder(build_func=ndk.create_shared, timeout=15), + runner=autotvm.RPCRunner( + args.rpc_key, + host=args.host, + port=args.port, + number=3, + timeout=600, + ), + ), + } + if args.tune: + tasks = autotvm.task.extract_from_program( + net, target=target, target_host=target_host, params=params + ) + tune_tasks(tasks, **tuning_options) + + print_progress("%-20s building..." % network) + + # Build the tuning log + if os.path.exists(tune_log): + with autotvm.apply_history_best(tune_log): + with tvm.transform.PassContext(opt_level=3): + lib = relay.build( + net, target=tvm.target.Target(target, host=target_host), params=params + ) + else: + with tvm.transform.PassContext(opt_level=3): + lib = relay.build( + net, target=tvm.target.Target(target, host=target_host), params=params + ) + + tmp = tempdir() + + filename = "%s.so" % network + lib.export_library(tmp.relpath(filename), ndk.create_shared) + + # upload library and params + print_progress("%-20s uploading..." % network) + + # connect to remote device + tracker = tvm.rpc.connect_tracker(args.host, args.port) + remote = tracker.request(args.rpc_key) + + dev = remote.device(str(target), 0) + remote.upload(tmp.relpath(filename)) + + rlib = remote.load_module(filename) + module = runtime.GraphModule(rlib["default"](dev)) + data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype)) + module.set_input("data", data_tvm) + + # evaluate + print_progress("%-20s evaluating..." % network) + ftimer = module.module.time_evaluator("run", dev, number=1, repeat=repeat) + prof_res = np.array(ftimer().results) * 1000 # multiply 1000 for converting to millisecond + print( + "%-20s %-19s (%s)" + % (network + "-" + dtype, "%.2f ms" % np.mean(prof_res), "%.2f ms" % np.std(prof_res)) + ) + return (np.mean(prof_res), np.std(prof_res)) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--network", + type=str, + choices=[ + "resnet-18", + "resnet-34", + "resnet-50", + "vgg-16", + "vgg-19", + "densenet-121", + "inception_v3", + "mobilenet", + "squeezenet_v1.0", + "squeezenet_v1.1", + ], + help="The name of neural network", + ) + parser.add_argument("--host", type=str, default="127.0.0.1") + parser.add_argument("--port", type=int, default=9190) + parser.add_argument("--rpc-key", type=str, default="android") + parser.add_argument("--repeat", type=int, default=30) + parser.add_argument("--tune", type=bool, default=False) + args = parser.parse_args() + + if args.network is None: + networks = [ + "resnet-18", + "resnet-34", + "resnet-50", + "vgg-16", + "vgg-19", + "densenet-121", + "inception_v3", + "mobilenet", + "squeezenet_v1.0", + "squeezenet_v1.1", + ] + else: + networks = [args.network] + + target = "opencl -device=adreno" + target_host = "llvm -mtriple=arm64-linux-android" + + print("--------------------------------------------------") + print("%-20s %-20s" % ("Network Name", "Mean Inference Time (std dev)")) + print("--------------------------------------------------") + + results = {} + + for network in networks: + ftime = evaluate_network(network, target, target_host, "float32", args.repeat) + results[network + "-float32"] = ftime + ftime = evaluate_network(network, target, target_host, "float16", args.repeat) + results[network + "-float16"] = ftime + + print("----------------------------------------------------------------------") + print("%-30s %-30s" % ("Network Name", "Mean Inference Time (std dev)")) + print("----------------------------------------------------------------------") + for key, val in results.items(): + print("%-30s %-30s (%s)" % (key, "%.2f ms" % val[0], "%.2f ms" % val[1])) diff --git a/apps/benchmark/adreno/bench.sh b/apps/benchmark/adreno/bench.sh new file mode 100755 index 000000000000..7d46685b8654 --- /dev/null +++ b/apps/benchmark/adreno/bench.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -euxo pipefail + +source tests/scripts/setup-pytest-env.sh +export PYTHONPATH=${PYTHONPATH}:${TVM_PATH}/apps/extension/python +export LD_LIBRARY_PATH="build:${LD_LIBRARY_PATH:-}" + +export TVM_TRACKER_HOST=127.0.0.1 +export TVM_TRACKER_PORT=$(((RANDOM % 100) + 9100)) +export RPC_DEVICE_KEY="android" +export TVM_NDK_CC="${ANDROID_NDK_HOME}/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android28-clang" + +env PYTHONPATH=python python3 -m tvm.exec.rpc_tracker --host "${TVM_TRACKER_HOST}" --port "${TVM_TRACKER_PORT}" & +TRACKER_PID=$! +sleep 5 # Wait for tracker to bind + +export ANDROID_SERIAL=$2 + +adb shell "mkdir -p /data/local/tmp/tvm_ci" +adb push build-adreno-target/tvm_rpc /data/local/tmp/tvm_ci/tvm_rpc_ci +adb push build-adreno-target/libtvm_runtime.so /data/local/tmp/tvm_ci + +adb reverse tcp:${TVM_TRACKER_PORT} tcp:${TVM_TRACKER_PORT} +adb forward tcp:5000 tcp:5000 +adb forward tcp:5001 tcp:5001 +adb forward tcp:5002 tcp:5002 +env adb shell "cd /data/local/tmp/tvm_ci; killall -9 tvm_rpc_ci; sleep 2; LD_LIBRARY_PATH=/data/local/tmp/tvm_ci/ ./tvm_rpc_ci server --host=0.0.0.0 --port=5000 --port-end=5010 --tracker=127.0.0.1:${TVM_TRACKER_PORT} --key=${RPC_DEVICE_KEY}" & +DEVICE_PID=$! +sleep 5 # Wait for the device connections +trap "{ kill ${TRACKER_PID}; kill ${DEVICE_PID}; }" 0 + +# cleanup pycache +find . -type f -path "*.pyc" | xargs rm -f +# Test TVM +make cython3 + +if [ "texture" == $1 ] ; then + python3 apps/benchmark/adreno/adreno_gpu_bench_texture.py --host ${TVM_TRACKER_HOST} --port ${TVM_TRACKER_PORT} --rpc-key ${RPC_DEVICE_KEY} +fi + +kill ${TRACKER_PID} +kill ${DEVICE_PID} diff --git a/tests/scripts/ci.py b/tests/scripts/ci.py index 16389d29354c..756b269d0e50 100755 --- a/tests/scripts/ci.py +++ b/tests/scripts/ci.py @@ -727,6 +727,13 @@ def add_subparser( "./tests/scripts/task_python_adreno.sh " + os.environ.get("ANDROID_SERIAL", ""), ], ), + "benchmarks": ( + "run Adreno Texture Benchmarks", + [ + "./apps/benchmark/adreno/bench.sh texture " + + os.environ.get("ANDROID_SERIAL", ""), + ], + ), }, ), ]