From 83fae834c4a09a323ada78a35c08d5b9cb8aa287 Mon Sep 17 00:00:00 2001 From: SparkSnail Date: Fri, 21 Jun 2019 13:35:14 +0700 Subject: [PATCH 1/2] Add versionCheck in config (#1187) --- docs/en_US/ExperimentConfig.md | 7 ++++++- tools/nni_cmd/config_schema.py | 1 + tools/nni_cmd/launcher.py | 5 ++++- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/docs/en_US/ExperimentConfig.md b/docs/en_US/ExperimentConfig.md index 3459383306..39dcae37da 100644 --- a/docs/en_US/ExperimentConfig.md +++ b/docs/en_US/ExperimentConfig.md @@ -150,10 +150,15 @@ machineList: Note: The maxExecDuration spec set the time of an experiment, not a trial job. If the experiment reach the max duration time, the experiment will not stop, but could not submit new trial jobs any more. +* __versionCheck__ + * Description + + NNI will check the version of nniManager process and the version of trialKeeper in remote, pai and kubernetes platform. If you want to disable version check, you could set versionCheck be false. + * __debug__ * Description - NNI will check the version of nniManager process and the version of trialKeeper in remote, pai and kubernetes platform. If you want to disable version check, you could set debug be true. + Debug mode will set versionCheck be False and set logLevel be 'debug' * __maxTrialNum__ * Description diff --git a/tools/nni_cmd/config_schema.py b/tools/nni_cmd/config_schema.py index 14070c7bd4..3136a1fcf1 100644 --- a/tools/nni_cmd/config_schema.py +++ b/tools/nni_cmd/config_schema.py @@ -56,6 +56,7 @@ def setPathCheck(key): Optional('nniManagerIp'): setType('nniManagerIp', str), Optional('logDir'): And(os.path.isdir, error=SCHEMA_PATH_ERROR % 'logDir'), Optional('debug'): setType('debug', bool), + Optional('versionCheck'): setType('versionCheck', bool), Optional('logLevel'): setChoice('logLevel', 'trace', 'debug', 'info', 'warning', 'error', 'fatal'), Optional('logCollection'): setChoice('logCollection', 'http', 'none'), 'useAnnotation': setType('useAnnotation', bool), diff --git a/tools/nni_cmd/launcher.py b/tools/nni_cmd/launcher.py index f04ea8f1f1..42e36be2f5 100644 --- a/tools/nni_cmd/launcher.py +++ b/tools/nni_cmd/launcher.py @@ -303,6 +303,9 @@ def set_experiment(experiment_config, mode, port, config_file_name): #debug mode should disable version check if experiment_config.get('debug') is not None: request_data['versionCheck'] = not experiment_config.get('debug') + #validate version check + if experiment_config.get('versionCheck') is not None: + request_data['versionCheck'] = experiment_config.get('versionCheck') if experiment_config.get('logCollection'): request_data['logCollection'] = experiment_config.get('logCollection') @@ -363,7 +366,7 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen exit(1) log_dir = experiment_config['logDir'] if experiment_config.get('logDir') else None log_level = experiment_config['logLevel'] if experiment_config.get('logLevel') else None - if log_level not in ['trace', 'debug'] and args.debug: + if log_level not in ['trace', 'debug'] and (args.debug or experiment_config.get('debug') is True): log_level = 'debug' # start rest server rest_process, start_time = start_rest_server(args.port, experiment_config['trainingServicePlatform'], mode, config_file_name, experiment_id, log_dir, log_level) From 150ee83aa5dbaff4e07a2c9c3c644a69997a8359 Mon Sep 17 00:00:00 2001 From: XupuWang <806218468@qq.com> Date: Fri, 21 Jun 2019 16:33:07 +0800 Subject: [PATCH 2/2] fix negative time number in local mode when trial time is short (#1185) * fix negative time number in local mode when trial time is short * fix bug of duration<0 * fix windows version and readme * change tab * change line --- docs/en_US/Trials.md | 2 +- docs/zh_CN/Trials.md | 4 ++-- .../training_service/local/localTrainingService.ts | 4 ++-- src/webui/src/components/overview/SuccessTable.tsx | 11 +++++++++-- src/webui/src/components/trial-detail/TableList.tsx | 3 ++- 5 files changed, 16 insertions(+), 8 deletions(-) diff --git a/docs/en_US/Trials.md b/docs/en_US/Trials.md index 5a39f03d86..017c48d222 100644 --- a/docs/en_US/Trials.md +++ b/docs/en_US/Trials.md @@ -144,7 +144,7 @@ export NNI_TRIAL_SEQ_ID=1 export MULTI_PHASE=false export CUDA_VISIBLE_DEVICES= eval python3 mnist.py 2>/home/user_name/nni/experiments/$experiment_id$/trials/$trial_id$/stderr -echo $? `date +%s000` >/home/user_name/nni/experiments/$experiment_id$/trials/$trial_id$/.nni/state +echo $? `date +%s%3N` >/home/user_name/nni/experiments/$experiment_id$/trials/$trial_id$/.nni/state ``` ### Other Modes diff --git a/docs/zh_CN/Trials.md b/docs/zh_CN/Trials.md index 993952c72a..475438c55a 100644 --- a/docs/zh_CN/Trials.md +++ b/docs/zh_CN/Trials.md @@ -149,7 +149,7 @@ export NNI_TRIAL_SEQ_ID=1 export MULTI_PHASE=false export CUDA_VISIBLE_DEVICES= eval python3 mnist.py 2>/home/user_name/nni/experiments/$experiment_id$/trials/$trial_id$/stderr -echo $? `date +%s000` >/home/user_name/nni/experiments/$experiment_id$/trials/$trial_id$/.nni/state +echo $? `date +%s%3N` >/home/user_name/nni/experiments/$experiment_id$/trials/$trial_id$/.nni/state ``` ### 其它模式 @@ -166,4 +166,4 @@ echo $? `date +%s000` >/home/user_name/nni/experiments/$experiment_id$/trials/$t * [为 CIFAR 10 分类找到最佳的 optimizer](Cifar10Examples.md) * [如何在 NNI 调优 SciKit-learn 的参数](SklearnExamples.md) * [在阅读理解上使用自动模型架构搜索。](SquadEvolutionExamples.md) -* [如何在 NNI 上调优 GBDT](GbdtExample.md) \ No newline at end of file +* [如何在 NNI 上调优 GBDT](GbdtExample.md) diff --git a/src/nni_manager/training_service/local/localTrainingService.ts b/src/nni_manager/training_service/local/localTrainingService.ts index 31a90db695..a4cdcd5328 100644 --- a/src/nni_manager/training_service/local/localTrainingService.ts +++ b/src/nni_manager/training_service/local/localTrainingService.ts @@ -507,12 +507,12 @@ class LocalTrainingService implements TrainingService { script.push( `cmd /c ${localTrailConfig.command} 2>${path.join(workingDirectory, 'stderr')}`, `$NOW_DATE = [int64](([datetime]::UtcNow)-(get-date "1/1/1970")).TotalSeconds`, - `$NOW_DATE = "$NOW_DATE" + "000"`, + `$NOW_DATE = "$NOW_DATE" + (Get-Date -Format fff).ToString()`, `Write $LASTEXITCODE " " $NOW_DATE | Out-File ${path.join(workingDirectory, '.nni', 'state')} -NoNewline -encoding utf8`); } else { script.push( `eval ${localTrailConfig.command} 2>${path.join(workingDirectory, 'stderr')}`, - `echo $? \`date +%s000\` >${path.join(workingDirectory, '.nni', 'state')}`); + `echo $? \`date +%s%3N\` >${path.join(workingDirectory, '.nni', 'state')}`); } return script; diff --git a/src/webui/src/components/overview/SuccessTable.tsx b/src/webui/src/components/overview/SuccessTable.tsx index 97108e11b3..18d7ee55a6 100644 --- a/src/webui/src/components/overview/SuccessTable.tsx +++ b/src/webui/src/components/overview/SuccessTable.tsx @@ -72,8 +72,15 @@ class SuccessTable extends React.Component { sorter: (a: TableObj, b: TableObj) => (a.duration as number) - (b.duration as number), render: (text: string, record: TableObj) => { let duration; - if (record.duration) { - duration = convertDuration(record.duration); + if (record.duration !== undefined) { + // duration is nagative number(-1) & 0-1 + if (record.duration > 0 && record.duration < 1 || record.duration < 0) { + duration = `${record.duration}s`; + } else { + duration = convertDuration(record.duration); + } + } else { + duration = 0; } return (
{duration}
diff --git a/src/webui/src/components/trial-detail/TableList.tsx b/src/webui/src/components/trial-detail/TableList.tsx index 954a1cd1b5..448c237eb2 100644 --- a/src/webui/src/components/trial-detail/TableList.tsx +++ b/src/webui/src/components/trial-detail/TableList.tsx @@ -264,7 +264,8 @@ class TableList extends React.Component { render: (text: string, record: TableObj) => { let duration; if (record.duration !== undefined) { - if (record.duration > 0 && record.duration < 1) { + // duration is nagative number(-1) & 0-1 + if (record.duration > 0 && record.duration < 1 || record.duration < 0) { duration = `${record.duration}s`; } else { duration = convertDuration(record.duration);