-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Support save and open experiments #2750
Changes from 26 commits
dcd2ffd
3b8b6fb
916e444
caeffb8
57c300e
65660e6
9376d6a
5fef3cf
5544ae8
f9fdfee
c5e26ef
10a04ba
60c888f
aa64fe6
0a42e9a
d68f73f
c6a5f8c
81facec
806ce57
2f3fb7a
dfdcf1f
576bfe9
aa708ca
0080891
4ada470
07ab608
e9d0ebc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,9 +18,9 @@ | |
from .rest_utils import rest_get, rest_delete, check_rest_server_quick, check_response | ||
from .url_utils import trial_jobs_url, experiment_url, trial_job_id_url, export_data_url | ||
from .config_utils import Config, Experiments | ||
from .constants import NNICTL_HOME_DIR, EXPERIMENT_INFORMATION_FORMAT, EXPERIMENT_DETAIL_FORMAT, \ | ||
from .constants import NNICTL_HOME_DIR, NNI_HOME_DIR, EXPERIMENT_INFORMATION_FORMAT, EXPERIMENT_DETAIL_FORMAT, \ | ||
EXPERIMENT_MONITOR_INFO, TRIAL_MONITOR_HEAD, TRIAL_MONITOR_CONTENT, TRIAL_MONITOR_TAIL, REST_TIME_OUT | ||
from .common_utils import print_normal, print_error, print_warning, detect_process, get_yml_content | ||
from .common_utils import print_normal, print_error, print_warning, detect_process, get_yml_content, generate_temp_dir | ||
from .command_utils import check_output_command, kill_command | ||
from .ssh_utils import create_ssh_sftp_client, remove_remote_directory | ||
|
||
|
@@ -736,3 +736,165 @@ def search_space_auto_gen(args): | |
print_warning('Expected search space file \'{}\' generated, but not found.'.format(file_path)) | ||
else: | ||
print_normal('Generate search space done: \'{}\'.'.format(file_path)) | ||
|
||
def save_experiment(args): | ||
'''save experiment data to a zip file''' | ||
experiment_config = Experiments() | ||
experiment_dict = experiment_config.get_all_experiments() | ||
if args.id is None: | ||
print_error('Please set experiment id.') | ||
exit(1) | ||
if args.id not in experiment_dict: | ||
print_error('Cannot find experiment {0}.'.format(args.id)) | ||
exit(1) | ||
if experiment_dict[args.id].get('status') != 'STOPPED': | ||
print_error('Can only save stopped experiment!') | ||
exit(1) | ||
print_normal('Saving...') | ||
nni_config = Config(experiment_dict[args.id]['fileName']) | ||
logDir = os.path.join(NNI_HOME_DIR, args.id) | ||
if nni_config.get_config('logDir'): | ||
logDir = os.path.join(nni_config.get_config('logDir'), args.id) | ||
temp_root_dir = generate_temp_dir() | ||
|
||
# Step1. Copy logDir to temp folder | ||
if not os.path.exists(logDir): | ||
print_error('logDir: %s does not exist!' % logDir) | ||
exit(1) | ||
temp_experiment_dir = os.path.join(temp_root_dir, 'experiment') | ||
shutil.copytree(logDir, temp_experiment_dir) | ||
|
||
# Step2. Copy nnictl metadata to temp folder | ||
temp_nnictl_dir = os.path.join(temp_root_dir, 'nnictl') | ||
os.makedirs(temp_nnictl_dir, exist_ok=True) | ||
try: | ||
with open(os.path.join(temp_nnictl_dir, '.experiment'), 'w') as file: | ||
experiment_dict[args.id]['id'] = args.id | ||
json.dump(experiment_dict[args.id], file) | ||
except IOError: | ||
print_error('Write file to %s failed!' % os.path.join(temp_nnictl_dir, '.experiment')) | ||
exit(1) | ||
nnictl_config_dir = os.path.join(NNICTL_HOME_DIR, experiment_dict[args.id]['fileName']) | ||
shutil.copytree(nnictl_config_dir, os.path.join(temp_nnictl_dir, experiment_dict[args.id]['fileName'])) | ||
|
||
# Step3. Copy code dir | ||
if args.saveCodeDir: | ||
temp_code_dir = os.path.join(temp_root_dir, 'code') | ||
shutil.copytree(nni_config.get_config('experimentConfig')['trial']['codeDir'], temp_code_dir) | ||
|
||
# Step4. Archive folder | ||
zip_package_name = 'nni_experiment_%s' % args.id | ||
if args.path: | ||
os.makedirs(args.path, exist_ok=True) | ||
zip_package_name = os.path.join(args.path, zip_package_name) | ||
shutil.make_archive(zip_package_name, 'zip', temp_root_dir) | ||
print_normal('Save to %s.zip success!' % zip_package_name) | ||
|
||
# Step5. Cleanup temp data | ||
shutil.rmtree(temp_root_dir) | ||
|
||
def load_experiment(args): | ||
'''load experiment data''' | ||
package_path = os.path.expanduser(args.path) | ||
if not os.path.exists(args.path): | ||
print_error('file path %s does not exist!' % args.path) | ||
exit(1) | ||
temp_root_dir = generate_temp_dir() | ||
shutil.unpack_archive(package_path, temp_root_dir) | ||
print_normal('Loading...') | ||
# Step1. Validation | ||
if not os.path.exists(args.codeDir): | ||
print_error('Invalid: codeDir path does not exist!') | ||
exit(1) | ||
if args.logDir: | ||
if not os.path.exists(args.logDir): | ||
print_error('Invalid: logDir path does not exist!') | ||
exit(1) | ||
experiment_temp_dir = os.path.join(temp_root_dir, 'experiment') | ||
if not os.path.exists(os.path.join(experiment_temp_dir, 'db')): | ||
print_error('Invalid archive file: db file does not exist!') | ||
shutil.rmtree(temp_root_dir) | ||
exit(1) | ||
nnictl_temp_dir = os.path.join(temp_root_dir, 'nnictl') | ||
if not os.path.exists(os.path.join(nnictl_temp_dir, '.experiment')): | ||
print_error('Invalid archive file: nnictl metadata file does not exist!') | ||
shutil.rmtree(temp_root_dir) | ||
exit(1) | ||
try: | ||
with open(os.path.join(nnictl_temp_dir, '.experiment'), 'r') as file: | ||
experiment_metadata = json.load(file) | ||
except ValueError as err: | ||
print_error('Invalid nnictl metadata file: %s' % err) | ||
shutil.rmtree(temp_root_dir) | ||
exit(1) | ||
experiment_config = Experiments() | ||
experiment_dict = experiment_config.get_all_experiments() | ||
experiment_id = experiment_metadata.get('id') | ||
if experiment_id in experiment_dict: | ||
print_error('Invalid: experiment id already exist!') | ||
shutil.rmtree(temp_root_dir) | ||
exit(1) | ||
if not os.path.exists(os.path.join(nnictl_temp_dir, experiment_metadata.get('fileName'))): | ||
print_error('Invalid: experiment metadata does not exist!') | ||
shutil.rmtree(temp_root_dir) | ||
exit(1) | ||
|
||
# Step2. Copy nnictl metadata | ||
src_path = os.path.join(nnictl_temp_dir, experiment_metadata.get('fileName')) | ||
dest_path = os.path.join(NNICTL_HOME_DIR, experiment_metadata.get('fileName')) | ||
if os.path.exists(dest_path): | ||
shutil.rmtree(dest_path) | ||
shutil.copytree(src_path, dest_path) | ||
|
||
# Step3. Copy experiment data | ||
nni_config = Config(experiment_metadata.get('fileName')) | ||
nnictl_exp_config = nni_config.get_config('experimentConfig') | ||
if args.logDir: | ||
logDir = args.logDir | ||
nnictl_exp_config['logDir'] = logDir | ||
else: | ||
if nnictl_exp_config.get('logDir'): | ||
logDir = nnictl_exp_config['logDir'] | ||
else: | ||
logDir = NNI_HOME_DIR | ||
os.rename(os.path.join(temp_root_dir, 'experiment'), os.path.join(temp_root_dir, experiment_id)) | ||
src_path = os.path.join(os.path.join(temp_root_dir, experiment_id)) | ||
dest_path = os.path.join(os.path.join(logDir, experiment_id)) | ||
if os.path.exists(dest_path): | ||
shutil.rmtree(dest_path) | ||
shutil.copytree(src_path, dest_path) | ||
|
||
# Step4. Copy code dir | ||
codeDir = os.path.expanduser(args.codeDir) | ||
if not os.path.isabs(codeDir): | ||
codeDir = os.path.join(os.getcwd(), codeDir) | ||
print_normal('Expand codeDir to %s' % codeDir) | ||
nnictl_exp_config['trial']['codeDir'] = codeDir | ||
archive_code_dir = os.path.join(temp_root_dir, 'code') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why not copy entire 'code' dir using shutil.copytree? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
if os.path.exists(archive_code_dir): | ||
file_list = os.listdir(archive_code_dir) | ||
for file_name in file_list: | ||
src_path = os.path.join(archive_code_dir, file_name) | ||
target_path = os.path.join(codeDir, file_name) | ||
if os.path.exists(target_path): | ||
print_error('Copy %s failed, %s exist!' % (file_name, target_path)) | ||
continue | ||
if os.path.isdir(src_path): | ||
shutil.copytree(src_path, target_path) | ||
else: | ||
shutil.copy(src_path, target_path) | ||
|
||
# Step5. Create experiment metadata | ||
nni_config.set_config('experimentConfig', nnictl_exp_config) | ||
experiment_config.add_experiment(experiment_id, | ||
experiment_metadata.get('port'), | ||
experiment_metadata.get('startTime'), | ||
experiment_metadata.get('fileName'), | ||
experiment_metadata.get('platform'), | ||
experiment_metadata.get('experimentName'), | ||
experiment_metadata.get('endTime'), | ||
experiment_metadata.get('status')) | ||
print_normal('Open experiment %s succsss!' % experiment_id) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Open -> Load ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed |
||
|
||
# Step6. Cleanup temp data | ||
shutil.rmtree(temp_root_dir) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is it possible that the dir already existed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the dir name is generate by
random()
function, will not exist logically, I also add logic to handle this scenario, if the dir exist, nni will regenerate a new dir until it is not existed originally.