Skip to content

Commit

Permalink
Support monitor mode when creating or resuming a new experiment (micr…
Browse files Browse the repository at this point in the history
  • Loading branch information
SparkSnail authored Jan 8, 2020
1 parent 4ed78ed commit bf2b929
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 11 deletions.
2 changes: 2 additions & 0 deletions docs/en_US/Tutorial/Nnictl.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ nnictl support commands:
|--config, -c| True| |YAML configure file of the experiment|
|--port, -p|False| |the port of restful server|
|--debug, -d|False||set debug mode|
|--watch, -w|False||set watch mode|

* Examples

Expand Down Expand Up @@ -97,6 +98,7 @@ Debug mode will disable version check function in Trialkeeper.
|id| True| |The id of the experiment you want to resume|
|--port, -p| False| |Rest port of the experiment you want to resume|
|--debug, -d|False||set debug mode|
|--watch, -w|False||set watch mode|

* Example

Expand Down
4 changes: 2 additions & 2 deletions tools/bash-completion
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# list of commands/arguments
__nnictl_cmds="create resume view update stop trial experiment platform import export webui config log package tensorboard top"
__nnictl_create_cmds="--config --port --debug"
__nnictl_resume_cmds="--port --debug"
__nnictl_create_cmds="--config --port --debug --watch"
__nnictl_resume_cmds="--port --debug --watch"
__nnictl_view_cmds="--port"
__nnictl_update_cmds="searchspace concurrency duration trialnum"
__nnictl_update_searchspace_cmds="--filename"
Expand Down
6 changes: 4 additions & 2 deletions tools/nni_cmd/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
detect_port, get_user, get_python_dir
from .constants import NNICTL_HOME_DIR, ERROR_INFO, REST_TIME_OUT, EXPERIMENT_SUCCESS_INFO, LOG_HEADER, PACKAGE_REQUIREMENTS
from .command_utils import check_output_command, kill_command
from .nnictl_utils import update_experiment
from .nnictl_utils import update_experiment, set_monitor

def get_log_path(config_file_name):
'''generate stdout and stderr log path'''
Expand Down Expand Up @@ -493,6 +493,8 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
experiment_config['experimentName'])

print_normal(EXPERIMENT_SUCCESS_INFO % (experiment_id, ' '.join(web_ui_url_list)))
if args.watch:
set_monitor(True, 3, args.port, rest_process.pid)

def create_experiment(args):
'''start a new experiment'''
Expand All @@ -506,8 +508,8 @@ def create_experiment(args):
validate_all_content(experiment_config, config_path)

nni_config.set_config('experimentConfig', experiment_config)
launch_experiment(args, experiment_config, 'new', config_file_name)
nni_config.set_config('restServerPort', args.port)
launch_experiment(args, experiment_config, 'new', config_file_name)

def manage_stopped_experiment(args, mode):
'''view a stopped experiment'''
Expand Down
2 changes: 2 additions & 0 deletions tools/nni_cmd/nnictl.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,15 @@ def parse_args():
parser_start.add_argument('--config', '-c', required=True, dest='config', help='the path of yaml config file')
parser_start.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_start.add_argument('--debug', '-d', action='store_true', help=' set debug mode')
parser_start.add_argument('--watch', '-w', action='store_true', help=' set watch mode')
parser_start.set_defaults(func=create_experiment)

# parse resume command
parser_resume = subparsers.add_parser('resume', help='resume a new experiment')
parser_resume.add_argument('id', nargs='?', help='The id of the experiment you want to resume')
parser_resume.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_resume.add_argument('--debug', '-d', action='store_true', help=' set debug mode')
parser_resume.add_argument('--watch', '-w', action='store_true', help=' set watch mode')
parser_resume.set_defaults(func=resume_experiment)

# parse view command
Expand Down
36 changes: 29 additions & 7 deletions tools/nni_cmd/nnictl_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import csv
import os
import sys
import json
import time
import re
Expand Down Expand Up @@ -623,23 +624,44 @@ def show_experiment_info():
content[index].get('endTime'), content[index].get('status')))
print(TRIAL_MONITOR_TAIL)

def monitor_experiment(args):
'''monitor the experiment'''
if args.time <= 0:
print_error('please input a positive integer as time interval, the unit is second.')
exit(1)
def set_monitor(auto_exit, time_interval, port=None, pid=None):
'''set the experiment monitor engine'''
while True:
try:
os.system('clear')
if sys.platform == 'win32':
os.system('cls')
else:
os.system('clear')
update_experiment()
show_experiment_info()
time.sleep(args.time)
if auto_exit:
status = get_experiment_status(port)
if status in ['DONE', 'ERROR', 'STOPPED']:
print_normal('Experiment status is {0}.'.format(status))
print_normal('Stopping experiment...')
kill_command(pid)
print_normal('Stop experiment success.')
exit(0)
time.sleep(time_interval)
except KeyboardInterrupt:
if auto_exit:
print_normal('Stopping experiment...')
kill_command(pid)
print_normal('Stop experiment success.')
else:
print_normal('Exiting...')
exit(0)
except Exception as exception:
print_error(exception)
exit(1)

def monitor_experiment(args):
'''monitor the experiment'''
if args.time <= 0:
print_error('please input a positive integer as time interval, the unit is second.')
exit(1)
set_monitor(False, args.time)

def export_trials_data(args):
'''export experiment metadata to csv
'''
Expand Down

0 comments on commit bf2b929

Please sign in to comment.