diff --git a/docs/en_US/Nnictl.md b/docs/en_US/Nnictl.md
index a0fe0b50c4..e7c6dbdc78 100644
--- a/docs/en_US/Nnictl.md
+++ b/docs/en_US/Nnictl.md
@@ -15,6 +15,7 @@ nnictl support commands:
* [nnictl trial](#trial)
* [nnictl top](#top)
* [nnictl experiment](#experiment)
+* [nnictl platform](#platform)
* [nnictl config](#config)
* [nnictl log](#log)
* [nnictl webui](#webui)
@@ -370,6 +371,26 @@ Debug mode will disable version check function in Trialkeeper.
nnictl experiment list
```
+* __nnictl experiment delete__
+
+ * Description
+
+ Delete one or all experiments, it includes log, result, environment information and cache. It uses to delete useless experiment result, or save disk space.
+
+ * Usage
+
+ ```bash
+ nnictl experiment delete [OPTIONS]
+ ```
+
+ * Options
+
+ |Name, shorthand|Required|Default|Description|
+ |------|------|------ |------|
+ |id| False| |ID of the experiment|
+
+
+
* __nnictl experiment export__
@@ -456,6 +477,32 @@ Debug mode will disable version check function in Trialkeeper.
nnictl experiment import [experiment_id] -f experiment_data.json
```
+
+![](https://placehold.it/15/1589F0/000000?text=+) `Manage platform information`
+
+* __nnictl platform clean__
+
+ * Description
+
+ It uses to clean up disk on a target platform. The provided YAML file includes the information of target platform, and it follows the same schema as the NNI configuration file.
+
+ * Note
+
+ if the target platform is being used by other users, it may cause unexpected errors to others.
+
+ * Usage
+
+ ```bash
+ nnictl platform clean [OPTIONS]
+ ```
+
+ * Options
+
+ |Name, shorthand|Required|Default|Description|
+ |------|------|------ |------|
+ |--config| True| |the path of yaml config file used when create an experiment|
+
+
![](https://placehold.it/15/1589F0/000000?text=+) `nnictl config show`
diff --git a/tools/nni_cmd/nnictl.py b/tools/nni_cmd/nnictl.py
index 286f643ab3..3fc908e1b6 100644
--- a/tools/nni_cmd/nnictl.py
+++ b/tools/nni_cmd/nnictl.py
@@ -121,6 +121,19 @@ def parse_args():
parser_experiment_list = parser_experiment_subparsers.add_parser('list', help='list all of running experiment ids')
parser_experiment_list.add_argument('all', nargs='?', help='list all of experiments')
parser_experiment_list.set_defaults(func=experiment_list)
+ parser_experiment_clean = parser_experiment_subparsers.add_parser('delete', help='clean up the experiment data')
+ parser_experiment_clean.add_argument('id', nargs='?', help='the id of experiment')
+ parser_experiment_clean.add_argument('--all', action='store_true', default=False, help='delete all of experiments')
+ parser_experiment_clean.set_defaults(func=experiment_clean)
+
+ #parse experiment command
+ parser_platform = subparsers.add_parser('platform', help='get platform information')
+ #add subparsers for parser_experiment
+ parser_platform_subparsers = parser_platform.add_subparsers()
+ parser_platform_clean = parser_platform_subparsers.add_parser('clean', help='clean up the platform data')
+ parser_platform_clean.add_argument('--config', '-c', required=True, dest='config', help='the path of yaml config file')
+ parser_platform_clean.set_defaults(func=platform_clean)
+
#import tuning data
parser_import_data = parser_experiment_subparsers.add_parser('import', help='import additional data')
parser_import_data.add_argument('id', nargs='?', help='the id of experiment')
diff --git a/tools/nni_cmd/nnictl_utils.py b/tools/nni_cmd/nnictl_utils.py
index 94a3cb9f37..2333fd6c0b 100644
--- a/tools/nni_cmd/nnictl_utils.py
+++ b/tools/nni_cmd/nnictl_utils.py
@@ -24,6 +24,10 @@
import json
import datetime
import time
+import re
+from pathlib import Path
+from pyhdfs import HdfsClient, HdfsFileNotFoundException
+import shutil
from subprocess import call, check_output
from nni_annotation import expand_annotations
from .rest_utils import rest_get, rest_delete, check_rest_server_quick, check_response
@@ -31,8 +35,9 @@
from .config_utils import Config, Experiments
from .constants import NNICTL_HOME_DIR, EXPERIMENT_INFORMATION_FORMAT, EXPERIMENT_DETAIL_FORMAT, \
EXPERIMENT_MONITOR_INFO, TRIAL_MONITOR_HEAD, TRIAL_MONITOR_CONTENT, TRIAL_MONITOR_TAIL, REST_TIME_OUT
-from .common_utils import print_normal, print_error, print_warning, detect_process
+from .common_utils import print_normal, print_error, print_warning, detect_process, get_yml_content
from .command_utils import check_output_command, kill_command
+from .ssh_utils import create_ssh_sftp_client, remove_remote_directory
def get_experiment_time(port):
'''get the startTime and endTime of an experiment'''
@@ -73,10 +78,11 @@ def update_experiment():
if status:
experiment_config.update_experiment(key, 'status', status)
-def check_experiment_id(args):
+def check_experiment_id(args, update=True):
'''check if the id is valid
'''
- update_experiment()
+ if update:
+ update_experiment()
experiment_config = Experiments()
experiment_dict = experiment_config.get_all_experiments()
if not experiment_dict:
@@ -170,7 +176,7 @@ def get_config_filename(args):
'''get the file name of config file'''
experiment_id = check_experiment_id(args)
if experiment_id is None:
- print_error('Please set the experiment id!')
+ print_error('Please set correct experiment id!')
exit(1)
experiment_config = Experiments()
experiment_dict = experiment_config.get_all_experiments()
@@ -180,7 +186,7 @@ def get_experiment_port(args):
'''get the port of experiment'''
experiment_id = check_experiment_id(args)
if experiment_id is None:
- print_error('Please set the experiment id!')
+ print_error('Please set correct experiment id!')
exit(1)
experiment_config = Experiments()
experiment_dict = experiment_config.get_all_experiments()
@@ -373,6 +379,166 @@ def webui_url(args):
nni_config = Config(get_config_filename(args))
print_normal('{0} {1}'.format('Web UI url:', ' '.join(nni_config.get_config('webuiUrl'))))
+def local_clean(directory):
+ '''clean up local data'''
+ print_normal('removing folder {0}'.format(directory))
+ try:
+ shutil.rmtree(directory)
+ except FileNotFoundError as err:
+ print_error('{0} does not exist!'.format(directory))
+
+def remote_clean(machine_list, experiment_id=None):
+ '''clean up remote data'''
+ for machine in machine_list:
+ passwd = machine.get('passwd')
+ userName = machine.get('username')
+ host = machine.get('ip')
+ port = machine.get('port')
+ if experiment_id:
+ remote_dir = '/' + '/'.join(['tmp', 'nni', 'experiments', experiment_id])
+ else:
+ remote_dir = '/' + '/'.join(['tmp', 'nni', 'experiments'])
+ sftp = create_ssh_sftp_client(host, port, userName, passwd)
+ print_normal('removing folder {0}'.format(host + ':' + str(port) + remote_dir))
+ remove_remote_directory(sftp, remote_dir)
+
+def hdfs_clean(host, user_name, output_dir, experiment_id=None):
+ '''clean up hdfs data'''
+ hdfs_client = HdfsClient(hosts='{0}:80'.format(host), user_name=user_name, webhdfs_path='/webhdfs/api/v1', timeout=5)
+ if experiment_id:
+ full_path = '/' + '/'.join([user_name, 'nni', 'experiments', experiment_id])
+ else:
+ full_path = '/' + '/'.join([user_name, 'nni', 'experiments'])
+ print_normal('removing folder {0} in hdfs'.format(full_path))
+ hdfs_client.delete(full_path, recursive=True)
+ if output_dir:
+ pattern = re.compile('hdfs://(?P([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(?P/.*)?')
+ match_result = pattern.match(output_dir)
+ if match_result:
+ output_host = match_result.group('host')
+ output_dir = match_result.group('baseDir')
+ #check if the host is valid
+ if output_host != host:
+ print_warning('The host in {0} is not consistent with {1}'.format(output_dir, host))
+ else:
+ if experiment_id:
+ output_dir = output_dir + '/' + experiment_id
+ print_normal('removing folder {0} in hdfs'.format(output_dir))
+ hdfs_client.delete(output_dir, recursive=True)
+
+def experiment_clean(args):
+ '''clean up the experiment data'''
+ experiment_id_list = []
+ experiment_config = Experiments()
+ experiment_dict = experiment_config.get_all_experiments()
+ if args.all:
+ experiment_id_list = list(experiment_dict.keys())
+ else:
+ if args.id is None:
+ print_error('please set experiment id!')
+ exit(1)
+ if args.id not in experiment_dict:
+ print_error('can not find id {0}!'.format(args.id))
+ exit(1)
+ experiment_id_list.append(args.id)
+ while True:
+ print('INFO: This action will delete experiment {0}, and it’s not recoverable.'.format(' '.join(experiment_id_list)))
+ inputs = input('INFO: do you want to continue?[y/N]:')
+ if not inputs.lower() or inputs.lower() in ['n', 'no']:
+ exit(0)
+ elif inputs.lower() not in ['y', 'n', 'yes', 'no']:
+ print_warning('please input Y or N!')
+ else:
+ break
+ for experiment_id in experiment_id_list:
+ nni_config = Config(experiment_dict[experiment_id]['fileName'])
+ platform = nni_config.get_config('experimentConfig').get('trainingServicePlatform')
+ experiment_id = nni_config.get_config('experimentId')
+ if platform == 'remote':
+ machine_list = nni_config.get_config('experimentConfig').get('machineList')
+ remote_clean(machine_list, experiment_id)
+ elif platform == 'pai':
+ host = nni_config.get_config('experimentConfig').get('paiConfig').get('host')
+ user_name = nni_config.get_config('experimentConfig').get('paiConfig').get('userName')
+ output_dir = nni_config.get_config('experimentConfig').get('trial').get('outputDir')
+ hdfs_clean(host, user_name, output_dir, experiment_id)
+ elif platform != 'local':
+ #TODO: support all platforms
+ print_warning('platform {0} clean up not supported yet!'.format(platform))
+ exit(0)
+ #clean local data
+ home = str(Path.home())
+ local_dir = nni_config.get_config('experimentConfig').get('logDir')
+ if not local_dir:
+ local_dir = os.path.join(home, 'nni', 'experiments', experiment_id)
+ local_clean(local_dir)
+ experiment_config = Experiments()
+ print_normal('removing metadata of experiment {0}'.format(experiment_id))
+ experiment_config.remove_experiment(experiment_id)
+ print_normal('Finish!')
+
+def get_platform_dir(config_content):
+ '''get the dir list to be deleted'''
+ platform = config_content.get('trainingServicePlatform')
+ dir_list = []
+ if platform == 'remote':
+ machine_list = config_content.get('machineList')
+ for machine in machine_list:
+ host = machine.get('ip')
+ port = machine.get('port')
+ dir_list.append(host + ':' + str(port) + '/tmp/nni/experiments')
+ elif platform == 'pai':
+ pai_config = config_content.get('paiConfig')
+ host = config_content.get('paiConfig').get('host')
+ user_name = config_content.get('paiConfig').get('userName')
+ output_dir = config_content.get('trial').get('outputDir')
+ dir_list.append('hdfs://{0}:9000/{1}/nni/experiments'.format(host, user_name))
+ if output_dir:
+ dir_list.append(output_dir)
+ return dir_list
+
+def platform_clean(args):
+ '''clean up the experiment data'''
+ config_path = os.path.abspath(args.config)
+ if not os.path.exists(config_path):
+ print_error('Please set correct config path!')
+ exit(1)
+ config_content = get_yml_content(config_path)
+ platform = config_content.get('trainingServicePlatform')
+ if platform not in ['remote', 'pai']:
+ print_normal('platform {0} not supported!'.format(platform))
+ exit(0)
+ experiment_config = Experiments()
+ experiment_dict = experiment_config.get_all_experiments()
+ update_experiment()
+ id_list = list(experiment_dict.keys())
+ dir_list = get_platform_dir(config_content)
+ if not dir_list:
+ print_normal('No folder of NNI caches is found!')
+ exit(1)
+ while True:
+ print_normal('This command will remove below folders of NNI caches. If other users are using experiments on below hosts, it will be broken.')
+ for dir in dir_list:
+ print(' ' + dir)
+ inputs = input('INFO: do you want to continue?[y/N]:')
+ if not inputs.lower() or inputs.lower() in ['n', 'no']:
+ exit(0)
+ elif inputs.lower() not in ['y', 'n', 'yes', 'no']:
+ print_warning('please input Y or N!')
+ else:
+ break
+ if platform == 'remote':
+ machine_list = config_content.get('machineList')
+ for machine in machine_list:
+ remote_clean(machine_list, None)
+ elif platform == 'pai':
+ pai_config = config_content.get('paiConfig')
+ host = config_content.get('paiConfig').get('host')
+ user_name = config_content.get('paiConfig').get('userName')
+ output_dir = config_content.get('trial').get('outputDir')
+ hdfs_clean(host, user_name, output_dir, None)
+ print_normal('Done!')
+
def experiment_list(args):
'''get the information of all experiments'''
experiment_config = Experiments()
@@ -393,7 +559,6 @@ def experiment_list(args):
print_warning('There is no experiment running...\nYou can use \'nnictl experiment list all\' to list all stopped experiments!')
experiment_information = ""
for key in experiment_id_list:
-
experiment_information += (EXPERIMENT_DETAIL_FORMAT % (key, experiment_dict[key]['status'], experiment_dict[key]['port'],\
experiment_dict[key].get('platform'), experiment_dict[key]['startTime'], experiment_dict[key]['endTime']))
print(EXPERIMENT_INFORMATION_FORMAT % experiment_information)
diff --git a/tools/nni_cmd/ssh_utils.py b/tools/nni_cmd/ssh_utils.py
index 550e64bea5..da707dac48 100644
--- a/tools/nni_cmd/ssh_utils.py
+++ b/tools/nni_cmd/ssh_utils.py
@@ -57,3 +57,17 @@ def create_ssh_sftp_client(host_ip, port, username, password):
return sftp
except Exception as exception:
print_error('Create ssh client error %s\n' % exception)
+
+def remove_remote_directory(sftp, directory):
+ '''remove a directory in remote machine'''
+ try:
+ files = sftp.listdir(directory)
+ for file in files:
+ filepath = '/'.join([directory, file])
+ try:
+ sftp.remove(filepath)
+ except IOError:
+ remove_remote_directory(sftp, filepath)
+ sftp.rmdir(directory)
+ except IOError as err:
+ print_error(err)
\ No newline at end of file