forked from awslabs/deeplearning-benchmark
-
Notifications
You must be signed in to change notification settings - Fork 1
/
benchmark_driver.py
executable file
·71 lines (54 loc) · 2.49 KB
/
benchmark_driver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from __future__ import print_function
import argparse
import os
from ast import literal_eval
import logging
try:
import ConfigParser
config = ConfigParser.ConfigParser()
except ImportError:
import configparser
config = configparser.ConfigParser()
from utils import cfg_process, metrics_manager
CONFIG_TEMPLATE_DIR = './task_config_template.cfg'
CONFIG_DIR = './task_config.cfg'
def run_benchmark(args):
# modify the template config file and generate the user defined config file.
cfg_process.generate_cfg(CONFIG_TEMPLATE_DIR, CONFIG_DIR, **vars(args))
config.read(CONFIG_DIR)
# the user defined config file should only have one task
selected_task = config.sections()[0]
metric_patterns = literal_eval(config.get(selected_task, "patterns"))
metric_names = literal_eval(config.get(selected_task, "metrics"))
metric_compute_methods = literal_eval(config.get(selected_task, "compute_method"))
command_to_execute = config.get(selected_task, "command_to_execute")
num_gpus = int(config.get(selected_task, "num_gpus"))
metrics_manager.benchmark(
command_to_execute=command_to_execute,
metric_patterns=metric_patterns,
metric_names=metric_names,
metric_compute_methods=metric_compute_methods,
num_gpus=num_gpus,
task_name=selected_task,
suffix=args.metrics_suffix,
framework=args.framework
)
# clean up
os.remove(CONFIG_DIR)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="Run a benchmark task.")
parser.add_argument('--framework', type=str, help='Framework name e.g. mxnet')
parser.add_argument('--task-name', type=str, help='Task Name e.g. resnet50_cifar10_symbolic.')
parser.add_argument('--num-gpus', type=int, help='Numbers of gpus. e.g. --num-gpus 8')
parser.add_argument('--epochs', type=int, help='Numbers of epochs for training. e.g. --epochs 20')
parser.add_argument('--metrics-suffix', type=str, help='Metrics suffix e.g. --metrics-suffix daily')
parser.add_argument('--kvstore', type=str, default='device',help='kvstore to use for trainer/module.')
parser.add_argument('--dtype', type=str, default='float32',help='floating point precision to use')
args = parser.parse_args()
log_file_location = args.task_name + ".log"
logging.basicConfig(filename=log_file_location,level=logging.DEBUG)
try:
run_benchmark(args)
except Exception:
logging.exception("Fatal error in run_benchmark")
exit()