From bb2ed152fe37f574e65d37d12b59a396ec660687 Mon Sep 17 00:00:00 2001 From: SparkSnail Date: Wed, 12 Aug 2020 09:31:42 +0800 Subject: [PATCH] Support save and open experiments (#2750) (cherry picked from commit d5072a29f541b5d5d208eb4e6143de6ff27cc764) --- docs/en_US/Tutorial/Nnictl.md | 59 +++++++++++- tools/nni_cmd/common_utils.py | 12 +++ tools/nni_cmd/config_utils.py | 8 +- tools/nni_cmd/constants.py | 2 + tools/nni_cmd/nnictl.py | 35 +++++-- tools/nni_cmd/nnictl_utils.py | 166 +++++++++++++++++++++++++++++++++- 6 files changed, 263 insertions(+), 19 deletions(-) diff --git a/docs/en_US/Tutorial/Nnictl.md b/docs/en_US/Tutorial/Nnictl.md index ed5c9761e1..81caf6f047 100644 --- a/docs/en_US/Tutorial/Nnictl.md +++ b/docs/en_US/Tutorial/Nnictl.md @@ -444,9 +444,6 @@ Debug mode will disable version check function in Trialkeeper. |--all| False| |delete all of experiments| - - - * __nnictl experiment export__ * Description @@ -531,6 +528,62 @@ Debug mode will disable version check function in Trialkeeper. nnictl experiment import [experiment_id] -f experiment_data.json ``` +* __nnictl experiment save__ + * Description + + Save nni experiment metadata and code data. + + * Usage + + ```bash + nnictl experiment save [OPTIONS] + ``` + + * Options + + |Name, shorthand|Required|Default|Description| + |------|------|------ |------| + |id| True| |The id of the experiment you want to save| + |--path, -p| False| |the folder path to store nni experiment data, default current working directory| + |--saveCodeDir, -s| False| |save codeDir data of the experiment, default False| + + * Examples + + > save an expeirment + + ```bash + nnictl experiment save [experiment_id] --saveCodeDir + ``` + +* __nnictl experiment load__ + * Description + + Load an nni experiment. + + * Usage + + ```bash + nnictl experiment load [OPTIONS] + ``` + + * Options + + |Name, shorthand|Required|Default|Description| + |------|------|------ |------| + |--path, -p| True| |the file path of nni package| + |--codeDir, -c| True| |the path of codeDir for loaded experiment, this path will also put the code in the loaded experiment package| + |--logDir, -l| False| |the path of logDir for loaded experiment| + + * Examples + + > load an expeirment + + ```bash + nnictl experiment load --path [path] --codeDir [codeDir] + ``` + + + ### Manage platform information diff --git a/tools/nni_cmd/common_utils.py b/tools/nni_cmd/common_utils.py index 4166bf034c..2edbf667df 100644 --- a/tools/nni_cmd/common_utils.py +++ b/tools/nni_cmd/common_utils.py @@ -4,7 +4,10 @@ import os import sys import json +import tempfile import socket +import string +import random import ruamel.yaml as yaml import psutil from colorama import Fore @@ -83,3 +86,12 @@ def check_tensorboard_version(): print_error('import tensorboard error!') exit(1) +def generate_temp_dir(): + '''generate a temp folder''' + def generate_folder_name(): + return os.path.join(tempfile.gettempdir(), 'nni', ''.join(random.sample(string.ascii_letters + string.digits, 8))) + temp_dir = generate_folder_name() + while os.path.exists(temp_dir): + temp_dir = generate_folder_name() + os.makedirs(temp_dir) + return temp_dir diff --git a/tools/nni_cmd/config_utils.py b/tools/nni_cmd/config_utils.py index 8cc1dc8ada..e6472ee3de 100644 --- a/tools/nni_cmd/config_utils.py +++ b/tools/nni_cmd/config_utils.py @@ -54,13 +54,13 @@ def __init__(self): self.experiment_file = os.path.join(NNICTL_HOME_DIR, '.experiment') self.experiments = self.read_file() - def add_experiment(self, expId, port, time, file_name, platform, experiment_name): + def add_experiment(self, expId, port, startTime, file_name, platform, experiment_name, endTime='N/A', status='INITIALIZED'): '''set {key:value} paris to self.experiment''' self.experiments[expId] = {} self.experiments[expId]['port'] = port - self.experiments[expId]['startTime'] = time - self.experiments[expId]['endTime'] = 'N/A' - self.experiments[expId]['status'] = 'INITIALIZED' + self.experiments[expId]['startTime'] = startTime + self.experiments[expId]['endTime'] = endTime + self.experiments[expId]['status'] = status self.experiments[expId]['fileName'] = file_name self.experiments[expId]['platform'] = platform self.experiments[expId]['experimentName'] = experiment_name diff --git a/tools/nni_cmd/constants.py b/tools/nni_cmd/constants.py index 5a37c3a1f1..0654473ed4 100644 --- a/tools/nni_cmd/constants.py +++ b/tools/nni_cmd/constants.py @@ -6,6 +6,8 @@ NNICTL_HOME_DIR = os.path.join(os.path.expanduser('~'), '.local', 'nnictl') +NNI_HOME_DIR = os.path.join(os.path.expanduser('~'), 'nni-experiments') + ERROR_INFO = 'ERROR: ' NORMAL_INFO = 'INFO: ' WARNING_INFO = 'WARNING: ' diff --git a/tools/nni_cmd/nnictl.py b/tools/nni_cmd/nnictl.py index 6a2991fe50..07afd85fab 100644 --- a/tools/nni_cmd/nnictl.py +++ b/tools/nni_cmd/nnictl.py @@ -11,7 +11,8 @@ from .nnictl_utils import stop_experiment, trial_ls, trial_kill, list_experiment, experiment_status,\ log_trial, experiment_clean, platform_clean, experiment_list, \ monitor_experiment, export_trials_data, trial_codegen, webui_url, \ - get_config, log_stdout, log_stderr, search_space_auto_gen, webui_nas + get_config, log_stdout, log_stderr, search_space_auto_gen, webui_nas, \ + save_experiment, load_experiment from .package_management import package_install, package_uninstall, package_show, package_list from .constants import DEFAULT_REST_PORT from .tensorboard_utils import start_tensorboard, stop_tensorboard @@ -129,15 +130,6 @@ def parse_args(): parser_experiment_clean.add_argument('id', nargs='?', help='the id of experiment') parser_experiment_clean.add_argument('--all', action='store_true', default=False, help='delete all of experiments') parser_experiment_clean.set_defaults(func=experiment_clean) - - #parse experiment command - parser_platform = subparsers.add_parser('platform', help='get platform information') - #add subparsers for parser_experiment - parser_platform_subparsers = parser_platform.add_subparsers() - parser_platform_clean = parser_platform_subparsers.add_parser('clean', help='clean up the platform data') - parser_platform_clean.add_argument('--config', '-c', required=True, dest='config', help='the path of yaml config file') - parser_platform_clean.set_defaults(func=platform_clean) - #import tuning data parser_import_data = parser_experiment_subparsers.add_parser('import', help='import additional data') parser_import_data.add_argument('id', nargs='?', help='the id of experiment') @@ -149,6 +141,29 @@ def parse_args(): parser_trial_export.add_argument('--type', '-t', choices=['json', 'csv'], required=True, dest='type', help='target file type') parser_trial_export.add_argument('--filename', '-f', required=True, dest='path', help='target file path') parser_trial_export.set_defaults(func=export_trials_data) + #save an NNI experiment + parser_save_experiment = parser_experiment_subparsers.add_parser('save', help='save an experiment') + parser_save_experiment.add_argument('id', nargs='?', help='the id of experiment') + parser_save_experiment.add_argument('--path', '-p', required=False, help='the folder path to store nni experiment data, \ + default current working directory') + parser_save_experiment.add_argument('--saveCodeDir', '-s', action='store_true', default=False, help='save codeDir data \ + of the experiment') + parser_save_experiment.set_defaults(func=save_experiment) + #load an NNI experiment + parser_load_experiment = parser_experiment_subparsers.add_parser('load', help='load an experiment') + parser_load_experiment.add_argument('--path', '-p', required=True, help='the path of nni package file') + parser_load_experiment.add_argument('--codeDir', '-c', required=True, help='the path of codeDir for loaded experiment, \ + this path will also put the code in the loaded experiment package') + parser_load_experiment.add_argument('--logDir', '-l', required=False, help='the path of logDir for loaded experiment') + parser_load_experiment.set_defaults(func=load_experiment) + + #parse platform command + parser_platform = subparsers.add_parser('platform', help='get platform information') + #add subparsers for parser_platform + parser_platform_subparsers = parser_platform.add_subparsers() + parser_platform_clean = parser_platform_subparsers.add_parser('clean', help='clean up the platform data') + parser_platform_clean.add_argument('--config', '-c', required=True, dest='config', help='the path of yaml config file') + parser_platform_clean.set_defaults(func=platform_clean) #TODO:finish webui function #parse board command diff --git a/tools/nni_cmd/nnictl_utils.py b/tools/nni_cmd/nnictl_utils.py index bbbf54fcc6..b411cfda77 100644 --- a/tools/nni_cmd/nnictl_utils.py +++ b/tools/nni_cmd/nnictl_utils.py @@ -18,9 +18,9 @@ from .rest_utils import rest_get, rest_delete, check_rest_server_quick, check_response from .url_utils import trial_jobs_url, experiment_url, trial_job_id_url, export_data_url from .config_utils import Config, Experiments -from .constants import NNICTL_HOME_DIR, EXPERIMENT_INFORMATION_FORMAT, EXPERIMENT_DETAIL_FORMAT, \ +from .constants import NNICTL_HOME_DIR, NNI_HOME_DIR, EXPERIMENT_INFORMATION_FORMAT, EXPERIMENT_DETAIL_FORMAT, \ EXPERIMENT_MONITOR_INFO, TRIAL_MONITOR_HEAD, TRIAL_MONITOR_CONTENT, TRIAL_MONITOR_TAIL, REST_TIME_OUT -from .common_utils import print_normal, print_error, print_warning, detect_process, get_yml_content +from .common_utils import print_normal, print_error, print_warning, detect_process, get_yml_content, generate_temp_dir from .command_utils import check_output_command, kill_command from .ssh_utils import create_ssh_sftp_client, remove_remote_directory @@ -736,3 +736,165 @@ def search_space_auto_gen(args): print_warning('Expected search space file \'{}\' generated, but not found.'.format(file_path)) else: print_normal('Generate search space done: \'{}\'.'.format(file_path)) + +def save_experiment(args): + '''save experiment data to a zip file''' + experiment_config = Experiments() + experiment_dict = experiment_config.get_all_experiments() + if args.id is None: + print_error('Please set experiment id.') + exit(1) + if args.id not in experiment_dict: + print_error('Cannot find experiment {0}.'.format(args.id)) + exit(1) + if experiment_dict[args.id].get('status') != 'STOPPED': + print_error('Can only save stopped experiment!') + exit(1) + print_normal('Saving...') + nni_config = Config(experiment_dict[args.id]['fileName']) + logDir = os.path.join(NNI_HOME_DIR, args.id) + if nni_config.get_config('logDir'): + logDir = os.path.join(nni_config.get_config('logDir'), args.id) + temp_root_dir = generate_temp_dir() + + # Step1. Copy logDir to temp folder + if not os.path.exists(logDir): + print_error('logDir: %s does not exist!' % logDir) + exit(1) + temp_experiment_dir = os.path.join(temp_root_dir, 'experiment') + shutil.copytree(logDir, temp_experiment_dir) + + # Step2. Copy nnictl metadata to temp folder + temp_nnictl_dir = os.path.join(temp_root_dir, 'nnictl') + os.makedirs(temp_nnictl_dir, exist_ok=True) + try: + with open(os.path.join(temp_nnictl_dir, '.experiment'), 'w') as file: + experiment_dict[args.id]['id'] = args.id + json.dump(experiment_dict[args.id], file) + except IOError: + print_error('Write file to %s failed!' % os.path.join(temp_nnictl_dir, '.experiment')) + exit(1) + nnictl_config_dir = os.path.join(NNICTL_HOME_DIR, experiment_dict[args.id]['fileName']) + shutil.copytree(nnictl_config_dir, os.path.join(temp_nnictl_dir, experiment_dict[args.id]['fileName'])) + + # Step3. Copy code dir + if args.saveCodeDir: + temp_code_dir = os.path.join(temp_root_dir, 'code') + shutil.copytree(nni_config.get_config('experimentConfig')['trial']['codeDir'], temp_code_dir) + + # Step4. Archive folder + zip_package_name = 'nni_experiment_%s' % args.id + if args.path: + os.makedirs(args.path, exist_ok=True) + zip_package_name = os.path.join(args.path, zip_package_name) + shutil.make_archive(zip_package_name, 'zip', temp_root_dir) + print_normal('Save to %s.zip success!' % zip_package_name) + + # Step5. Cleanup temp data + shutil.rmtree(temp_root_dir) + +def load_experiment(args): + '''load experiment data''' + package_path = os.path.expanduser(args.path) + if not os.path.exists(args.path): + print_error('file path %s does not exist!' % args.path) + exit(1) + temp_root_dir = generate_temp_dir() + shutil.unpack_archive(package_path, temp_root_dir) + print_normal('Loading...') + # Step1. Validation + if not os.path.exists(args.codeDir): + print_error('Invalid: codeDir path does not exist!') + exit(1) + if args.logDir: + if not os.path.exists(args.logDir): + print_error('Invalid: logDir path does not exist!') + exit(1) + experiment_temp_dir = os.path.join(temp_root_dir, 'experiment') + if not os.path.exists(os.path.join(experiment_temp_dir, 'db')): + print_error('Invalid archive file: db file does not exist!') + shutil.rmtree(temp_root_dir) + exit(1) + nnictl_temp_dir = os.path.join(temp_root_dir, 'nnictl') + if not os.path.exists(os.path.join(nnictl_temp_dir, '.experiment')): + print_error('Invalid archive file: nnictl metadata file does not exist!') + shutil.rmtree(temp_root_dir) + exit(1) + try: + with open(os.path.join(nnictl_temp_dir, '.experiment'), 'r') as file: + experiment_metadata = json.load(file) + except ValueError as err: + print_error('Invalid nnictl metadata file: %s' % err) + shutil.rmtree(temp_root_dir) + exit(1) + experiment_config = Experiments() + experiment_dict = experiment_config.get_all_experiments() + experiment_id = experiment_metadata.get('id') + if experiment_id in experiment_dict: + print_error('Invalid: experiment id already exist!') + shutil.rmtree(temp_root_dir) + exit(1) + if not os.path.exists(os.path.join(nnictl_temp_dir, experiment_metadata.get('fileName'))): + print_error('Invalid: experiment metadata does not exist!') + shutil.rmtree(temp_root_dir) + exit(1) + + # Step2. Copy nnictl metadata + src_path = os.path.join(nnictl_temp_dir, experiment_metadata.get('fileName')) + dest_path = os.path.join(NNICTL_HOME_DIR, experiment_metadata.get('fileName')) + if os.path.exists(dest_path): + shutil.rmtree(dest_path) + shutil.copytree(src_path, dest_path) + + # Step3. Copy experiment data + nni_config = Config(experiment_metadata.get('fileName')) + nnictl_exp_config = nni_config.get_config('experimentConfig') + if args.logDir: + logDir = args.logDir + nnictl_exp_config['logDir'] = logDir + else: + if nnictl_exp_config.get('logDir'): + logDir = nnictl_exp_config['logDir'] + else: + logDir = NNI_HOME_DIR + os.rename(os.path.join(temp_root_dir, 'experiment'), os.path.join(temp_root_dir, experiment_id)) + src_path = os.path.join(os.path.join(temp_root_dir, experiment_id)) + dest_path = os.path.join(os.path.join(logDir, experiment_id)) + if os.path.exists(dest_path): + shutil.rmtree(dest_path) + shutil.copytree(src_path, dest_path) + + # Step4. Copy code dir + codeDir = os.path.expanduser(args.codeDir) + if not os.path.isabs(codeDir): + codeDir = os.path.join(os.getcwd(), codeDir) + print_normal('Expand codeDir to %s' % codeDir) + nnictl_exp_config['trial']['codeDir'] = codeDir + archive_code_dir = os.path.join(temp_root_dir, 'code') + if os.path.exists(archive_code_dir): + file_list = os.listdir(archive_code_dir) + for file_name in file_list: + src_path = os.path.join(archive_code_dir, file_name) + target_path = os.path.join(codeDir, file_name) + if os.path.exists(target_path): + print_error('Copy %s failed, %s exist!' % (file_name, target_path)) + continue + if os.path.isdir(src_path): + shutil.copytree(src_path, target_path) + else: + shutil.copy(src_path, target_path) + + # Step5. Create experiment metadata + nni_config.set_config('experimentConfig', nnictl_exp_config) + experiment_config.add_experiment(experiment_id, + experiment_metadata.get('port'), + experiment_metadata.get('startTime'), + experiment_metadata.get('fileName'), + experiment_metadata.get('platform'), + experiment_metadata.get('experimentName'), + experiment_metadata.get('endTime'), + experiment_metadata.get('status')) + print_normal('Load experiment %s succsss!' % experiment_id) + + # Step6. Cleanup temp data + shutil.rmtree(temp_root_dir)