Support monitor mode when creating or resuming a new experiment (micr…

…osoft#1933)
SparkSnail · Jan 8, 2020 · bf2b929 · bf2b929
1 parent 4ed78ed
commit bf2b929
Show file tree

Hide file tree

Showing 5 changed files with 39 additions and 11 deletions.
diff --git a/docs/en_US/Tutorial/Nnictl.md b/docs/en_US/Tutorial/Nnictl.md
@@ -49,6 +49,7 @@ nnictl support commands:
   |--config, -c|  True| |YAML configure file of the experiment|
   |--port, -p|False| |the port of restful server|
   |--debug, -d|False||set debug mode|
+  |--watch, -w|False||set watch mode|
 
 * Examples
 
@@ -97,6 +98,7 @@ Debug mode will disable version check function in Trialkeeper.
   |id|  True| |The id of the experiment you want to resume|
   |--port, -p|  False| |Rest port of the experiment you want to resume|
   |--debug, -d|False||set debug mode|
+  |--watch, -w|False||set watch mode|
 
 * Example
 

diff --git a/tools/bash-completion b/tools/bash-completion
@@ -1,7 +1,7 @@
 # list of commands/arguments
 __nnictl_cmds="create resume view update stop trial experiment platform import export webui config log package tensorboard top"
-__nnictl_create_cmds="--config --port --debug"
-__nnictl_resume_cmds="--port --debug"
+__nnictl_create_cmds="--config --port --debug --watch"
+__nnictl_resume_cmds="--port --debug --watch"
 __nnictl_view_cmds="--port"
 __nnictl_update_cmds="searchspace concurrency duration trialnum"
 __nnictl_update_searchspace_cmds="--filename"

diff --git a/tools/nni_cmd/launcher.py b/tools/nni_cmd/launcher.py
@@ -20,7 +20,7 @@
                           detect_port, get_user, get_python_dir
 from .constants import NNICTL_HOME_DIR, ERROR_INFO, REST_TIME_OUT, EXPERIMENT_SUCCESS_INFO, LOG_HEADER, PACKAGE_REQUIREMENTS
 from .command_utils import check_output_command, kill_command
-from .nnictl_utils import update_experiment
+from .nnictl_utils import update_experiment, set_monitor
 
 def get_log_path(config_file_name):
     '''generate stdout and stderr log path'''
@@ -493,6 +493,8 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
                                             experiment_config['experimentName'])
 
     print_normal(EXPERIMENT_SUCCESS_INFO % (experiment_id, '   '.join(web_ui_url_list)))
+    if args.watch:
+        set_monitor(True, 3, args.port, rest_process.pid)
 
 def create_experiment(args):
     '''start a new experiment'''
@@ -506,8 +508,8 @@ def create_experiment(args):
     validate_all_content(experiment_config, config_path)
 
     nni_config.set_config('experimentConfig', experiment_config)
-    launch_experiment(args, experiment_config, 'new', config_file_name)
     nni_config.set_config('restServerPort', args.port)
+    launch_experiment(args, experiment_config, 'new', config_file_name)
 
 def manage_stopped_experiment(args, mode):
     '''view a stopped experiment'''

diff --git a/tools/nni_cmd/nnictl.py b/tools/nni_cmd/nnictl.py
@@ -51,13 +51,15 @@ def parse_args():
     parser_start.add_argument('--config', '-c', required=True, dest='config', help='the path of yaml config file')
     parser_start.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
     parser_start.add_argument('--debug', '-d', action='store_true', help=' set debug mode')
+    parser_start.add_argument('--watch', '-w', action='store_true', help=' set watch mode')
     parser_start.set_defaults(func=create_experiment)
 
     # parse resume command
     parser_resume = subparsers.add_parser('resume', help='resume a new experiment')
     parser_resume.add_argument('id', nargs='?', help='The id of the experiment you want to resume')
     parser_resume.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
     parser_resume.add_argument('--debug', '-d', action='store_true', help=' set debug mode')
+    parser_resume.add_argument('--watch', '-w', action='store_true', help=' set watch mode')
     parser_resume.set_defaults(func=resume_experiment)
 
     # parse view command

diff --git a/tools/nni_cmd/nnictl_utils.py b/tools/nni_cmd/nnictl_utils.py
@@ -3,6 +3,7 @@
 
 import csv
 import os
+import sys
 import json
 import time
 import re
@@ -623,23 +624,44 @@ def show_experiment_info():
                           content[index].get('endTime'), content[index].get('status')))
         print(TRIAL_MONITOR_TAIL)
 
-def monitor_experiment(args):
-    '''monitor the experiment'''
-    if args.time <= 0:
-        print_error('please input a positive integer as time interval, the unit is second.')
-        exit(1)
+def set_monitor(auto_exit, time_interval, port=None, pid=None):
+    '''set the experiment monitor engine'''
     while True:
         try:
-            os.system('clear')
+            if sys.platform == 'win32':
+                os.system('cls')
+            else:
+                os.system('clear')
             update_experiment()
             show_experiment_info()
-            time.sleep(args.time)
+            if auto_exit:
+                status = get_experiment_status(port)
+                if status in ['DONE', 'ERROR', 'STOPPED']:
+                    print_normal('Experiment status is {0}.'.format(status))
+                    print_normal('Stopping experiment...')
+                    kill_command(pid)
+                    print_normal('Stop experiment success.')
+                    exit(0)
+            time.sleep(time_interval)
         except KeyboardInterrupt:
+            if auto_exit:
+                print_normal('Stopping experiment...')
+                kill_command(pid)
+                print_normal('Stop experiment success.')
+            else:
+                print_normal('Exiting...')
             exit(0)
         except Exception as exception:
             print_error(exception)
             exit(1)
 
+def monitor_experiment(args):
+    '''monitor the experiment'''
+    if args.time <= 0:
+        print_error('please input a positive integer as time interval, the unit is second.')
+        exit(1)
+    set_monitor(False, args.time)
+
 def export_trials_data(args):
     '''export experiment metadata to csv
     '''